From a71c9c132855c2ae33f8df3c243a86c4bb98f171 Mon Sep 17 00:00:00 2001
From: Sid Murching <sid.murching@databricks.com>
Date: Tue, 10 Mar 2026 21:31:42 -0700
Subject: [PATCH 1/6] feat: add agent-supervisor-api template

Adds a new app template that uses the Databricks Supervisor API
(AI Gateway at /mlflow/v1/responses) for server-side tool execution,
instead of managing an agent loop with the OpenAI Agents SDK.

Key implementation detail: DatabricksOpenAI defaults to
{host}/serving-endpoints, but the Supervisor API is at
{host}/mlflow/v1/responses. _get_client() overrides base_url to
point to the correct AI Gateway endpoint.

Includes unit tests (9 passing) and integration test stubs for the
live staging endpoint.

Signed-off-by: Sid Murching <sid.murching@databricks.com>
---
 .scripts/templates.py                         |   4 +
 .../.claude/skills/add-tools/SKILL.md         |  84 ++
 .../add-tools/examples/custom-mcp-server.md   |  58 ++
 .../skills/add-tools/examples/experiment.yaml |   8 +
 .../add-tools/examples/genie-space.yaml       |   9 +
 .../add-tools/examples/serving-endpoint.yaml  |   7 +
 .../add-tools/examples/sql-warehouse.yaml     |   7 +
 .../add-tools/examples/uc-connection.yaml     |   9 +
 .../add-tools/examples/uc-function.yaml       |   9 +
 .../add-tools/examples/vector-search.yaml     |   9 +
 .../.claude/skills/agent-memory/SKILL.md      | 176 ++++
 .../.claude/skills/deploy/SKILL.md            | 232 +++++
 .../.claude/skills/discover-tools/SKILL.md    |  47 +
 .../.claude/skills/lakebase-setup/SKILL.md    | 392 +++++++
 .../migrate-from-model-serving/SKILL.md       | 965 ++++++++++++++++++
 .../.claude/skills/modify-agent/SKILL.md      | 147 +++
 .../.claude/skills/quickstart/SKILL.md        |  83 ++
 .../.claude/skills/run-locally/SKILL.md       |  90 ++
 agent-supervisor-api/AGENTS.md                | 115 +++
 agent-supervisor-api/CLAUDE.md                |   1 +
 agent-supervisor-api/README.md                | 101 ++
 agent-supervisor-api/agent_server/__init__.py |   0
 agent-supervisor-api/agent_server/agent.py    |  72 ++
 .../agent_server/evaluate_agent.py            | 100 ++
 .../agent_server/start_server.py              |  17 +
 agent-supervisor-api/agent_server/utils.py    |   9 +
 agent-supervisor-api/app.yaml                 |  16 +
 agent-supervisor-api/databricks.yml           |  51 +
 agent-supervisor-api/pyproject.toml           |  36 +
 agent-supervisor-api/requirements.txt         |   1 +
 agent-supervisor-api/scripts/__init__.py      |   0
 .../scripts/discover_tools.py                 | 432 ++++++++
 agent-supervisor-api/scripts/quickstart.py    | 768 ++++++++++++++
 agent-supervisor-api/scripts/start_app.py     | 332 ++++++
 agent-supervisor-api/tests/__init__.py        |   0
 agent-supervisor-api/tests/test_agent.py      | 214 ++++
 36 files changed, 4601 insertions(+)
 create mode 100644 agent-supervisor-api/.claude/skills/add-tools/SKILL.md
 create mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/custom-mcp-server.md
 create mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/experiment.yaml
 create mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/genie-space.yaml
 create mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/serving-endpoint.yaml
 create mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/sql-warehouse.yaml
 create mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/uc-connection.yaml
 create mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/uc-function.yaml
 create mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/vector-search.yaml
 create mode 100644 agent-supervisor-api/.claude/skills/agent-memory/SKILL.md
 create mode 100644 agent-supervisor-api/.claude/skills/deploy/SKILL.md
 create mode 100644 agent-supervisor-api/.claude/skills/discover-tools/SKILL.md
 create mode 100644 agent-supervisor-api/.claude/skills/lakebase-setup/SKILL.md
 create mode 100644 agent-supervisor-api/.claude/skills/migrate-from-model-serving/SKILL.md
 create mode 100644 agent-supervisor-api/.claude/skills/modify-agent/SKILL.md
 create mode 100644 agent-supervisor-api/.claude/skills/quickstart/SKILL.md
 create mode 100644 agent-supervisor-api/.claude/skills/run-locally/SKILL.md
 create mode 100644 agent-supervisor-api/AGENTS.md
 create mode 100644 agent-supervisor-api/CLAUDE.md
 create mode 100644 agent-supervisor-api/README.md
 create mode 100644 agent-supervisor-api/agent_server/__init__.py
 create mode 100644 agent-supervisor-api/agent_server/agent.py
 create mode 100644 agent-supervisor-api/agent_server/evaluate_agent.py
 create mode 100644 agent-supervisor-api/agent_server/start_server.py
 create mode 100644 agent-supervisor-api/agent_server/utils.py
 create mode 100644 agent-supervisor-api/app.yaml
 create mode 100644 agent-supervisor-api/databricks.yml
 create mode 100644 agent-supervisor-api/pyproject.toml
 create mode 100644 agent-supervisor-api/requirements.txt
 create mode 100644 agent-supervisor-api/scripts/__init__.py
 create mode 100755 agent-supervisor-api/scripts/discover_tools.py
 create mode 100644 agent-supervisor-api/scripts/quickstart.py
 create mode 100644 agent-supervisor-api/scripts/start_app.py
 create mode 100644 agent-supervisor-api/tests/__init__.py
 create mode 100644 agent-supervisor-api/tests/test_agent.py

diff --git a/.scripts/templates.py b/.scripts/templates.py
index edf7f549..77faeacd 100644
--- a/.scripts/templates.py
+++ b/.scripts/templates.py
@@ -38,4 +38,8 @@
         "sdk": ["langgraph", "openai"],
         "bundle_name": "agent_migration",
     },
+    "agent-supervisor-api": {
+        "sdk": "openai",
+        "bundle_name": "agent_supervisor_api",
+    },
 }
diff --git a/agent-supervisor-api/.claude/skills/add-tools/SKILL.md b/agent-supervisor-api/.claude/skills/add-tools/SKILL.md
new file mode 100644
index 00000000..e07b5279
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/add-tools/SKILL.md
@@ -0,0 +1,84 @@
+---
+name: add-tools
+description: "Add tools to your agent and grant required permissions in databricks.yml. Use when: (1) Adding MCP servers, Genie spaces, vector search, or UC functions to agent, (2) Permission errors at runtime, (3) User says 'add tool', 'connect to', 'grant permission', (4) Configuring databricks.yml resources."
+---
+
+# Add Tools & Grant Permissions
+
+> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks <command> --profile <profile>`
+
+**After adding any MCP server to your agent, you MUST grant the app access in `databricks.yml`.**
+
+Without this, you'll get permission errors when the agent tries to use the resource.
+
+## Workflow
+
+**Step 1:** Add MCP server in `agent_server/agent.py`:
+```python
+from databricks_openai.agents import McpServer
+
+genie_server = McpServer(
+    url=f"{host}/api/2.0/mcp/genie/01234567-89ab-cdef",
+    name="my genie space",
+)
+
+agent = Agent(
+    name="my agent",
+    model="databricks-claude-3-7-sonnet",
+    mcp_servers=[genie_server],
+)
+```
+
+**Step 2:** Grant access in `databricks.yml`:
+```yaml
+resources:
+  apps:
+    agent_supervisor_api:
+      resources:
+        - name: 'my_genie_space'
+          genie_space:
+            name: 'My Genie Space'
+            space_id: '01234567-89ab-cdef'
+            permission: 'CAN_RUN'
+```
+
+**Step 3:** Deploy with `databricks bundle deploy` (see **deploy** skill)
+
+## Resource Type Examples
+
+See the `examples/` directory for complete YAML snippets:
+
+| File | Resource Type | When to Use |
+|------|--------------|-------------|
+| `uc-function.yaml` | Unity Catalog function | UC functions |
+| `uc-connection.yaml` | UC connection | External MCP servers |
+| `vector-search.yaml` | Vector search index | RAG applications |
+| `sql-warehouse.yaml` | SQL warehouse | SQL execution |
+| `serving-endpoint.yaml` | Model serving endpoint | Model inference |
+| `genie-space.yaml` | Genie space | Natural language data |
+| `experiment.yaml` | MLflow experiment | Tracing (already configured) |
+| `custom-mcp-server.md` | Custom MCP apps | Apps starting with `mcp-*` |
+
+## Custom MCP Servers (Databricks Apps)
+
+Apps are **not yet supported** as resource dependencies in `databricks.yml`. Manual permission grant required:
+
+**Step 1:** Get your agent app's service principal:
+```bash
+databricks apps get <your-agent-app-name> --output json | jq -r '.service_principal_name'
+```
+
+**Step 2:** Grant permission on the MCP server app:
+```bash
+databricks apps update-permissions <mcp-server-app-name> \
+  --json '{"access_control_list": [{"service_principal_name": "<agent-app-service-principal>", "permission_level": "CAN_USE"}]}'
+```
+
+See `examples/custom-mcp-server.md` for detailed steps.
+
+## Important Notes
+
+- **MLflow experiment**: Already configured in template, no action needed
+- **Multiple resources**: Add multiple entries under `resources:` list
+- **Permission types vary**: Each resource type has specific permission values
+- **Deploy after changes**: Run `databricks bundle deploy` after modifying `databricks.yml`
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/custom-mcp-server.md b/agent-supervisor-api/.claude/skills/add-tools/examples/custom-mcp-server.md
new file mode 100644
index 00000000..86b45858
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/add-tools/examples/custom-mcp-server.md
@@ -0,0 +1,58 @@
+# Custom MCP Server (Databricks App)
+
+Custom MCP servers are Databricks Apps with names starting with `mcp-*`.
+
+**Apps are not yet supported as resource dependencies in `databricks.yml`**, so manual permission grant is required.
+
+## Steps
+
+### 1. Add MCP server in `agent_server/agent.py`
+
+```python
+from databricks_openai.agents import McpServer
+
+custom_mcp = McpServer(
+    url="https://mcp-my-server.cloud.databricks.com/mcp",
+    name="my custom mcp server",
+)
+
+agent = Agent(
+    name="my agent",
+    model="databricks-claude-3-7-sonnet",
+    mcp_servers=[custom_mcp],
+)
+```
+
+### 2. Deploy your agent app first
+
+```bash
+databricks bundle deploy
+databricks bundle run <your-app-resource-name>  # from databricks.yml resources.apps.*
+```
+
+### 3. Get your agent app's service principal
+
+```bash
+databricks apps get <your-agent-app-name> --output json | jq -r '.service_principal_name'
+```
+
+Example output: `sp-abc123-def456`
+
+### 4. Grant permission on the MCP server app
+
+```bash
+databricks apps update-permissions <mcp-server-app-name> \
+  --json '{"access_control_list": [{"service_principal_name": "<agent-app-service-principal>", "permission_level": "CAN_USE"}]}'
+```
+
+Example:
+```bash
+databricks apps update-permissions mcp-my-server \
+  --json '{"access_control_list": [{"service_principal_name": "sp-abc123-def456", "permission_level": "CAN_USE"}]}'
+```
+
+## Notes
+
+- This manual step is required each time you connect to a new custom MCP server
+- The permission grant persists across deployments
+- If you redeploy the agent app with a new service principal, you'll need to grant permissions again
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/experiment.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/experiment.yaml
new file mode 100644
index 00000000..ac5c626a
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/add-tools/examples/experiment.yaml
@@ -0,0 +1,8 @@
+# MLflow Experiment
+# Use for: Tracing and model logging
+# Note: Already configured in template's databricks.yml
+
+- name: 'my_experiment'
+  experiment:
+    experiment_id: '12349876'
+    permission: 'CAN_MANAGE'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/genie-space.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/genie-space.yaml
new file mode 100644
index 00000000..71589d52
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/add-tools/examples/genie-space.yaml
@@ -0,0 +1,9 @@
+# Genie Space
+# Use for: Natural language interface to data
+# MCP URL: {host}/api/2.0/mcp/genie/{space_id}
+
+- name: 'my_genie_space'
+  genie_space:
+    name: 'My Genie Space'
+    space_id: '01234567-89ab-cdef'
+    permission: 'CAN_RUN'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/serving-endpoint.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/serving-endpoint.yaml
new file mode 100644
index 00000000..b49ce9da
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/add-tools/examples/serving-endpoint.yaml
@@ -0,0 +1,7 @@
+# Model Serving Endpoint
+# Use for: Model inference endpoints
+
+- name: 'my_endpoint'
+  serving_endpoint:
+    name: 'my_endpoint'
+    permission: 'CAN_QUERY'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/sql-warehouse.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/sql-warehouse.yaml
new file mode 100644
index 00000000..a6ce9446
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/add-tools/examples/sql-warehouse.yaml
@@ -0,0 +1,7 @@
+# SQL Warehouse
+# Use for: SQL query execution
+
+- name: 'my_warehouse'
+  sql_warehouse:
+    sql_warehouse_id: 'abc123def456'
+    permission: 'CAN_USE'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/uc-connection.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/uc-connection.yaml
new file mode 100644
index 00000000..316675fe
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/add-tools/examples/uc-connection.yaml
@@ -0,0 +1,9 @@
+# Unity Catalog Connection
+# Use for: External MCP servers via UC connections
+# MCP URL: {host}/api/2.0/mcp/external/{connection_name}
+
+- name: 'my_connection'
+  uc_securable:
+    securable_full_name: 'my-connection-name'
+    securable_type: 'CONNECTION'
+    permission: 'USE_CONNECTION'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/uc-function.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/uc-function.yaml
new file mode 100644
index 00000000..43f938a9
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/add-tools/examples/uc-function.yaml
@@ -0,0 +1,9 @@
+# Unity Catalog Function
+# Use for: UC functions accessed via MCP server
+# MCP URL: {host}/api/2.0/mcp/functions/{catalog}/{schema}/{function_name}
+
+- name: 'my_uc_function'
+  uc_securable:
+    securable_full_name: 'catalog.schema.function_name'
+    securable_type: 'FUNCTION'
+    permission: 'EXECUTE'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/vector-search.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/vector-search.yaml
new file mode 100644
index 00000000..0ba39027
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/add-tools/examples/vector-search.yaml
@@ -0,0 +1,9 @@
+# Vector Search Index
+# Use for: RAG applications with unstructured data
+# MCP URL: {host}/api/2.0/mcp/vector-search/{catalog}/{schema}/{index_name}
+
+- name: 'my_vector_index'
+  uc_securable:
+    securable_full_name: 'catalog.schema.index_name'
+    securable_type: 'TABLE'
+    permission: 'SELECT'
diff --git a/agent-supervisor-api/.claude/skills/agent-memory/SKILL.md b/agent-supervisor-api/.claude/skills/agent-memory/SKILL.md
new file mode 100644
index 00000000..896a8baa
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/agent-memory/SKILL.md
@@ -0,0 +1,176 @@
+---
+name: agent-openai-memory
+description: "Add memory capabilities to your agent. Use when: (1) User asks about 'memory', 'state', 'remember', 'conversation history', (2) Want to persist conversations or user preferences, (3) Adding checkpointing or long-term storage."
+---
+
+# Stateful Memory with OpenAI Agents SDK Sessions
+
+This template uses OpenAI Agents SDK [Sessions](https://openai.github.io/openai-agents-python/sessions/) with `AsyncDatabricksSession` to persist conversation history to a Databricks Lakebase instance.
+
+## How Sessions Work
+
+Sessions automatically manage conversation history for multi-turn interactions:
+
+1. **Before each run**: The session retrieves prior conversation history and prepends it to input
+2. **During the run**: New items (user messages, responses, tool calls) are generated
+3. **After each run**: All new items are automatically stored in the session
+
+This eliminates the need to manually manage conversation state between runs.
+
+## Key Concepts
+
+| Concept | Description |
+|---------|-------------|
+| **Session** | Stores conversation history for a specific `session_id` |
+| **`session_id`** | Unique identifier linking requests to the same conversation |
+| **`AsyncDatabricksSession`** | Session implementation backed by Databricks Lakebase |
+| **`LAKEBASE_INSTANCE_NAME`** | Environment variable specifying the Lakebase instance |
+
+## How This Template Uses Sessions
+
+### Session Creation (`agent_server/agent.py`)
+
+```python
+from databricks_openai.agents import AsyncDatabricksSession
+
+session = AsyncDatabricksSession(
+    session_id=get_session_id(request),
+    instance_name=LAKEBASE_INSTANCE_NAME,
+)
+
+result = await Runner.run(agent, messages, session=session)
+```
+
+### Session ID Extraction (`agent_server/agent.py`)
+
+The `session_id` is extracted from `custom_inputs` or auto-generated:
+
+```python
+def get_session_id(request: ResponsesAgentRequest) -> str:
+    if hasattr(request, "custom_inputs") and request.custom_inputs:
+        if "session_id" in request.custom_inputs:
+            return request.custom_inputs["session_id"]
+    return str(uuid7())
+```
+
+### Lakebase Instance Resolution (`agent_server/utils.py`)
+
+The `LAKEBASE_INSTANCE_NAME` env var can be either an instance name or a hostname. The `resolve_lakebase_instance_name()` function handles both cases:
+
+```python
+_LAKEBASE_INSTANCE_NAME_RAW = os.environ.get("LAKEBASE_INSTANCE_NAME")
+LAKEBASE_INSTANCE_NAME = resolve_lakebase_instance_name(_LAKEBASE_INSTANCE_NAME_RAW)
+```
+
+---
+
+## Prerequisites
+
+1. **Dependency**: `databricks-openai[memory]` must be in `pyproject.toml` (already included)
+
+2. **Lakebase instance**: You need a Databricks Lakebase instance. See the **lakebase-setup** skill for creating and configuring one.
+
+3. **Environment variable**: Set `LAKEBASE_INSTANCE_NAME` in your `.env` file:
+   ```bash
+   LAKEBASE_INSTANCE_NAME=<your-lakebase-instance-name>
+   ```
+
+---
+
+## Configuration Files
+
+### databricks.yml (Lakebase Resource)
+
+Add the Lakebase database resource to your app:
+
+```yaml
+resources:
+  apps:
+    agent_openai_agents_sdk_short_term_memory:
+      name: "your-app-name"
+      source_code_path: ./
+
+      resources:
+        # ... other resources (experiment, etc.) ...
+
+        # Lakebase instance for session storage
+        - name: 'database'
+          database:
+            instance_name: '<your-lakebase-instance-name>'
+            database_name: 'databricks_postgres'
+            permission: 'CAN_CONNECT_AND_CREATE'
+```
+
+### databricks.yml config block (Environment Variables)
+
+The `LAKEBASE_INSTANCE_NAME` env var is resolved from the database resource at deploy time. Add to your app's `config.env` in `databricks.yml`:
+
+```yaml
+      config:
+        env:
+          - name: LAKEBASE_INSTANCE_NAME
+            value_from: "database"
+```
+
+### .env (Local Development)
+
+```bash
+LAKEBASE_INSTANCE_NAME=<your-lakebase-instance-name>
+```
+
+---
+
+## Testing Sessions
+
+### Test Multi-Turn Conversation Locally
+
+```bash
+# Start the server
+uv run start-app
+
+# First message - starts a new session
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hello, I live in SF!"}]}'
+
+# Note the session_id from custom_outputs in the response
+
+# Second message - continues the same session
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "What city did I say I live in?"}],
+      "custom_inputs": {"session_id": "<session_id from previous response>"}
+  }'
+```
+
+### Test Streaming
+
+```bash
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "Hello!"}],
+      "stream": true
+  }'
+```
+
+---
+
+## Troubleshooting
+
+| Issue | Cause | Solution |
+|-------|-------|----------|
+| **"LAKEBASE_INSTANCE_NAME environment variable is required"** | Missing env var | Set `LAKEBASE_INSTANCE_NAME` in `.env` |
+| **SSL connection closed unexpectedly** | Network/instance issue | Verify Lakebase instance is running: `databricks lakebase instances get <name>` |
+| **Agent doesn't remember previous messages** | Different session_id | Pass the same `session_id` via `custom_inputs` across requests |
+| **"Unable to resolve hostname"** | Hostname doesn't match any instance | Verify the hostname or use the instance name directly |
+| **Permission denied** | Missing Lakebase access | Add `database` resource to `databricks.yml` with `CAN_CONNECT_AND_CREATE` |
+
+---
+
+## Next Steps
+
+- Configure Lakebase: see **lakebase-setup** skill
+- Test locally: see **run-locally** skill
+- Deploy: see **deploy** skill
diff --git a/agent-supervisor-api/.claude/skills/deploy/SKILL.md b/agent-supervisor-api/.claude/skills/deploy/SKILL.md
new file mode 100644
index 00000000..6c6f40e0
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/deploy/SKILL.md
@@ -0,0 +1,232 @@
+---
+name: deploy
+description: "Deploy agent to Databricks Apps using DAB (Databricks Asset Bundles). Use when: (1) User says 'deploy', 'push to databricks', or 'bundle deploy', (2) 'App already exists' error occurs, (3) Need to bind/unbind existing apps, (4) Debugging deployed apps, (5) Querying deployed app endpoints."
+---
+
+# Deploy to Databricks Apps
+
+## Profile Configuration
+
+**IMPORTANT:** Before running any `databricks` CLI command, read the `.env` file to get the `DATABRICKS_CONFIG_PROFILE` value. All commands must include the profile:
+
+```bash
+databricks <command> --profile <profile>
+```
+
+For example, if `.env` has `DATABRICKS_CONFIG_PROFILE=dev`, run `databricks bundle deploy --profile dev`. Without this, the CLI may target the wrong workspace.
+
+## App Naming Convention
+
+Unless the user specifies a different name, apps should use the prefix `agent-*`:
+- `agent-data-analyst`
+- `agent-customer-support`
+- `agent-code-helper`
+
+Update the app name in `databricks.yml`:
+```yaml
+resources:
+  apps:
+    agent_supervisor_api:
+      name: "agent-your-app-name"  # Use agent-* prefix
+```
+
+## Deploy Commands
+
+**IMPORTANT:** Always run BOTH commands to deploy and start your app:
+
+```bash
+# 1. Validate bundle configuration (catches errors before deploy)
+databricks bundle validate
+
+# 2. Deploy the bundle (creates/updates resources, uploads files)
+databricks bundle deploy
+
+# 3. Run the app (starts/restarts with uploaded source code) - REQUIRED!
+databricks bundle run agent_supervisor_api
+```
+
+> **Note:** `bundle deploy` only uploads files and configures resources. `bundle run` is **required** to actually start/restart the app with the new code. If you only run `deploy`, the app will continue running old code!
+
+The resource key `agent_supervisor_api` matches the app name in `databricks.yml` under `resources.apps`.
+
+## Handling "App Already Exists" Error
+
+If `databricks bundle deploy` fails with:
+```
+Error: failed to create app
+Failed to create app <app-name>. An app with the same name already exists.
+```
+
+**Ask the user:** "Would you like to bind the existing app to this bundle, or delete it and create a new one?"
+
+### Option 1: Bind Existing App (Recommended)
+
+**Step 1:** Get the existing app's full configuration:
+```bash
+# Get app config including budget_policy_id and other server-side settings
+databricks apps get <existing-app-name> --output json | jq '{name, budget_policy_id, description}'
+```
+
+**Step 2:** Update `databricks.yml` to match the existing app's configuration exactly:
+```yaml
+resources:
+  apps:
+    agent_supervisor_api:
+      name: "existing-app-name"  # Must match exactly
+      budget_policy_id: "xxx-xxx-xxx"  # Copy from step 1 if present
+```
+
+> **Why this matters:** Existing apps may have server-side configuration (like `budget_policy_id`) that isn't in your bundle. If these don't match, Terraform will fail with "Provider produced inconsistent result after apply". Always sync the app's current config to `databricks.yml` before binding.
+
+**Step 3:** If deploying to a `mode: production` target, set `workspace.root_path`:
+```yaml
+targets:
+  prod:
+    mode: production
+    workspace:
+      root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target}
+```
+
+> **Why this matters:** Production mode requires an explicit root path to ensure only one copy of the bundle is deployed. Without this, the deploy will fail with a recommendation to set `workspace.root_path`.
+
+**Step 4:** Check if already bound, then bind if needed:
+```bash
+# Check if resource is already managed by this bundle
+databricks bundle summary --output json | jq '.resources.apps'
+
+# If the app appears in the summary, skip binding and go to Step 5
+# If NOT in summary, bind the resource:
+databricks bundle deployment bind agent_supervisor_api <existing-app-name> --auto-approve
+```
+
+> **Note:** If bind fails with "Resource already managed by Terraform", the app is already bound to this bundle. Skip to Step 5 and deploy directly.
+
+**Step 5:** Deploy:
+```bash
+databricks bundle deploy
+databricks bundle run agent_supervisor_api
+```
+
+### Option 2: Delete and Recreate
+
+```bash
+databricks apps delete <app-name>
+databricks bundle deploy
+```
+
+**Warning:** This permanently deletes the app's URL, OAuth credentials, and service principal.
+
+## Unbinding an App
+
+To remove the link between bundle and deployed app:
+
+```bash
+databricks bundle deployment unbind agent_supervisor_api
+```
+
+Use when:
+- Switching to a different app
+- Letting bundle create a new app
+- Switching between deployed instances
+
+Note: Unbinding doesn't delete the deployed app.
+
+## Query Deployed App
+
+> **IMPORTANT:** Databricks Apps are **only** queryable via OAuth token. You **cannot** use a Personal Access Token (PAT) to query your agent. Attempting to use a PAT will result in a 302 redirect error.
+
+**Get OAuth token:**
+```bash
+databricks auth token | jq -r '.access_token'
+```
+
+**Send request:**
+```bash
+curl -X POST <app-url>/invocations \
+  -H "Authorization: Bearer <oauth-token>" \
+  -H "Content-Type: application/json" \
+  -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }'
+```
+
+**If using memory** - include `user_id` to scope memories per user:
+```bash
+curl -X POST <app-url>/invocations \
+  -H "Authorization: Bearer <oauth-token>" \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "What do you remember about me?"}],
+      "custom_inputs": {"user_id": "user@example.com"}
+  }'
+```
+
+## On-Behalf-Of (OBO) User Authentication
+
+To authenticate as the requesting user instead of the app service principal:
+
+```python
+from agent_server.utils import get_user_workspace_client
+
+# In your agent code
+user_client = get_user_workspace_client()
+# Use user_client for operations that should run as the user
+```
+
+This is useful when you want the agent to access resources with the user's permissions rather than the app's service principal permissions.
+
+See: [OBO authentication documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/auth#retrieve-user-authorization-credentials)
+
+## Debug Deployed Apps
+
+```bash
+# View logs (follow mode)
+databricks apps logs <app-name> --follow
+
+# Check app status
+databricks apps get <app-name> --output json | jq '{app_status, compute_status}'
+
+# Get app URL
+databricks apps get <app-name> --output json | jq -r '.url'
+```
+
+## Important Notes
+
+- **App naming convention**: App names must be prefixed with `agent-` (e.g., `agent-my-assistant`, `agent-data-analyst`)
+- **Name is immutable**: Changing the `name` field in `databricks.yml` forces app replacement (destroy + create)
+- **Remote Terraform state**: Databricks stores state remotely; same app detected across directories
+- **Review the plan**: Look for `# forces replacement` in Terraform output before confirming
+
+## FAQ
+
+**Q: I see a 200 OK in the logs, but get an error in the actual stream. What's going on?**
+
+This is expected behavior. The initial 200 OK confirms stream setup was successful. Errors that occur during streaming don't affect the initial HTTP status code. Check the stream content for the actual error message.
+
+**Q: When querying my agent, I get a 302 redirect error. What's wrong?**
+
+You're likely using a Personal Access Token (PAT). Databricks Apps only support OAuth tokens. Generate one with:
+```bash
+databricks auth token
+```
+
+**Q: How do I add dependencies to my agent?**
+
+Use `uv add`:
+```bash
+uv add <package_name>
+# Example: uv add "mlflow-skinny[databricks]"
+```
+
+## Troubleshooting
+
+| Issue | Solution |
+|-------|----------|
+| Validation errors | Run `databricks bundle validate` to see detailed errors before deploying |
+| Permission errors at runtime | Grant resources in `databricks.yml` (see **add-tools** skill) |
+| Lakebase access errors | See **lakebase-setup** skill for permissions (if using memory) |
+| App not starting | Check `databricks apps logs <app-name>` |
+| Auth token expired | Run `databricks auth token` again |
+| 302 redirect error | Use OAuth token, not PAT |
+| "Provider produced inconsistent result" | Sync app config to `databricks.yml` |
+| "should set workspace.root_path" | Add `root_path` to production target |
+| App running old code after deploy | Run `databricks bundle run agent_supervisor_api` after deploy |
+| Env var is None in deployed app | Check `value_from` in databricks.yml `config.env` matches resource `name` |
diff --git a/agent-supervisor-api/.claude/skills/discover-tools/SKILL.md b/agent-supervisor-api/.claude/skills/discover-tools/SKILL.md
new file mode 100644
index 00000000..87c3f519
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/discover-tools/SKILL.md
@@ -0,0 +1,47 @@
+---
+name: discover-tools
+description: "Discover available tools and resources in Databricks workspace. Use when: (1) User asks 'what tools are available', (2) Before writing agent code, (3) Looking for MCP servers, Genie spaces, UC functions, or vector search indexes, (4) User says 'discover', 'find resources', or 'what can I connect to'."
+---
+
+# Discover Available Tools
+
+**Run tool discovery BEFORE writing agent code** to understand what resources are available in the workspace.
+
+## Run Discovery
+
+```bash
+uv run discover-tools
+```
+
+**Options:**
+```bash
+# Limit to specific catalog/schema
+uv run discover-tools --catalog my_catalog --schema my_schema
+
+# Output as JSON
+uv run discover-tools --format json --output tools.json
+
+# Save markdown report
+uv run discover-tools --output tools.md
+
+# Use specific Databricks profile
+uv run discover-tools --profile DEFAULT
+```
+
+## What Gets Discovered
+
+| Resource Type | Description | MCP URL Pattern |
+|--------------|-------------|-----------------|
+| **UC Functions** | SQL UDFs as agent tools | `{host}/api/2.0/mcp/functions/{catalog}/{schema}` |
+| **UC Tables** | Structured data for querying | (via UC functions) |
+| **Vector Search Indexes** | RAG applications | `{host}/api/2.0/mcp/vector-search/{catalog}/{schema}` |
+| **Genie Spaces** | Natural language data interface | `{host}/api/2.0/mcp/genie/{space_id}` |
+| **Custom MCP Servers** | Apps starting with `mcp-*` | `{app_url}/mcp` |
+| **External MCP Servers** | Via UC connections | `{host}/api/2.0/mcp/external/{connection_name}` |
+
+## Next Steps
+
+After discovering tools:
+1. **Add MCP servers to your agent** - See **modify-agent** skill for SDK-specific code examples
+2. **Grant permissions** in `databricks.yml` - See **add-tools** skill for YAML snippets
+3. **Test locally** with `uv run start-app` - See **run-locally** skill
diff --git a/agent-supervisor-api/.claude/skills/lakebase-setup/SKILL.md b/agent-supervisor-api/.claude/skills/lakebase-setup/SKILL.md
new file mode 100644
index 00000000..2dfbc9c3
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/lakebase-setup/SKILL.md
@@ -0,0 +1,392 @@
+---
+name: lakebase-setup
+description: "Configure Lakebase for agent memory storage. Use when: (1) Adding memory capabilities to the agent, (2) 'Failed to connect to Lakebase' errors, (3) Permission errors on checkpoint/store tables, (4) User says 'lakebase', 'memory setup', or 'add memory'."
+---
+
+# Lakebase Setup for Agent Persistence
+
+> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks <command> --profile <profile>` or `DATABRICKS_CONFIG_PROFILE=<profile> databricks <command>`
+
+## Overview
+
+Lakebase provides persistent PostgreSQL storage for agents:
+- **Short-term memory** (LangGraph): Conversation history within a thread (`AsyncCheckpointSaver`)
+- **Long-term memory** (LangGraph): User facts across sessions (`AsyncDatabricksStore`)
+- **Long-running agent persistence** (OpenAI SDK): Background task state via custom SQLAlchemy tables (`agent_server` schema)
+
+> **Note:** For pre-configured memory templates, see:
+> - `agent-langgraph-short-term-memory` - Conversation history within a session
+> - `agent-langgraph-long-term-memory` - User facts that persist across sessions
+> - `agent-openai-agents-sdk-long-running-agent` - Background tasks with Lakebase persistence
+
+## Complete Setup Workflow
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│  1. Add dependency  →  2. Get instance  →  3. Configure DAB               │
+│  4. Configure .env  →  5. Initialize tables  →  6. Deploy + Run      │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Step 1: Add Memory Dependency
+
+Add the memory extra to your `pyproject.toml`:
+
+```toml
+dependencies = [
+    "databricks-langchain[memory]",
+    # ... other dependencies
+]
+```
+
+Then sync dependencies:
+```bash
+uv sync
+```
+
+---
+
+## Step 2: Create or Get Lakebase Instance
+
+### Option A: Create New Instance (via Databricks UI)
+
+1. Go to your Databricks workspace
+2. Navigate to **Compute** → **Lakebase**
+3. Click **Create Instance**
+4. Note the instance name
+
+### Option B: Use Existing Instance
+
+If you have an existing instance, note its name for the next step.
+
+---
+
+## Step 3: Configure databricks.yml (Lakebase Resource)
+
+Add the Lakebase `database` resource to your app in `databricks.yml`:
+
+```yaml
+resources:
+  apps:
+    agent_langgraph:
+      name: "your-app-name"
+      source_code_path: ./
+
+      resources:
+        # ... other resources (experiment, UC functions, etc.) ...
+
+        # Lakebase instance for long-term memory
+        - name: 'database'
+          database:
+            instance_name: '<your-lakebase-instance-name>'
+            database_name: 'databricks_postgres'
+            permission: 'CAN_CONNECT_AND_CREATE'
+```
+
+**Important:**
+- The `instance_name: '<your-lakebase-instance-name>'` must match the actual Lakebase instance name
+- Using the `database` resource type automatically grants the app's service principal access to Lakebase
+
+### Add Environment Variables to databricks.yml config block
+
+Add the Lakebase environment variables to the `config.env` section of your app in `databricks.yml`:
+
+```yaml
+      config:
+        command: ["uv", "run", "start-app"]
+        env:
+          # ... other env vars ...
+
+          # Lakebase instance name - resolved from database resource at deploy time
+          - name: LAKEBASE_INSTANCE_NAME
+            value_from: "database"
+
+          # Static values for embedding configuration
+          - name: EMBEDDING_ENDPOINT
+            value: "databricks-gte-large-en"
+          - name: EMBEDDING_DIMS
+            value: "1024"
+```
+
+**Important:**
+- The `LAKEBASE_INSTANCE_NAME` uses `value_from: "database"` which resolves from the `database` resource at deploy time
+- The `database` resource handles permissions; the `config.env` provides the instance name to your code
+
+---
+
+## Step 4: Configure .env (Local Development)
+
+For local development, add to `.env`:
+
+```bash
+# Lakebase configuration for long-term memory
+LAKEBASE_INSTANCE_NAME=<your-instance-name>
+EMBEDDING_ENDPOINT=databricks-gte-large-en
+EMBEDDING_DIMS=1024
+```
+
+**Important:** `embedding_dims` must match the embedding endpoint:
+
+| Endpoint | Dimensions |
+|----------|------------|
+| `databricks-gte-large-en` | 1024 |
+| `databricks-bge-large-en` | 1024 |
+
+> **Note:** `.env` is only for local development. When deployed, the app gets `LAKEBASE_INSTANCE_NAME` from the `value_from` reference in the `databricks.yml` config block.
+
+---
+
+## Step 5: Initialize Tables
+
+### Option A: LangGraph Memory Templates (public schema)
+
+**Before deploying**, initialize the Lakebase tables. The `AsyncDatabricksStore` creates tables on first use, but you need to do this locally first:
+
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> uv run python -c "$(cat <<'EOF'
+import asyncio
+from databricks_langchain import AsyncDatabricksStore
+
+async def setup():
+    async with AsyncDatabricksStore(
+        instance_name="<your-instance-name>",
+        embedding_endpoint="databricks-gte-large-en",
+        embedding_dims=1024,
+    ) as store:
+        await store.setup()
+        print("Tables created!")
+
+asyncio.run(setup())
+EOF
+)"
+```
+
+This creates these tables in the `public` schema:
+- `store` - Key-value storage for memories
+- `store_vectors` - Vector embeddings for semantic search
+- `store_migrations` - Schema migration tracking
+- `vector_migrations` - Vector schema migration tracking
+
+### Option B: Long-Running Agent Templates (agent_server schema)
+
+The long-running agent uses SQLAlchemy with a custom `agent_server` schema. Tables are created automatically on app startup via `CREATE SCHEMA IF NOT EXISTS agent_server` and `Base.metadata.create_all`. No manual table initialization is needed.
+
+Tables created in the `agent_server` schema:
+- `responses` - Response status tracking for background agent tasks
+- `messages` - Stream events and output items for responses
+
+---
+
+## Step 6: Grant SP Permissions (CRITICAL for deployed apps)
+
+After deploying, the app's service principal needs Postgres roles to access Lakebase tables. The DAB `database` resource with `CAN_CONNECT_AND_CREATE` grants basic connectivity, but you must also grant Postgres-level schema and table permissions.
+
+**Step 1:** Get the app's service principal client ID:
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> databricks apps get <app-name> --output json | jq -r '.service_principal_client_id'
+```
+
+**Step 2:** Grant permissions using `LakebaseClient`:
+
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> uv run python -c "
+from databricks_ai_bridge.lakebase import LakebaseClient, SchemaPrivilege, TablePrivilege
+
+client = LakebaseClient(instance_name='<your-instance-name>')
+sp_id = '<service-principal-client-id>'  # UUID from step 1
+
+# Create role (must do first)
+client.create_role(sp_id, 'SERVICE_PRINCIPAL')
+
+# Grant schema privileges
+client.grant_schema(
+    grantee=sp_id,
+    schemas=['<schema-name>'],  # 'public' for LangGraph, 'agent_server' for long-running agent
+    privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE],
+)
+
+# Grant table privileges
+client.grant_table(
+    grantee=sp_id,
+    tables=['<schema>.<table1>', '<schema>.<table2>'],
+    privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, TablePrivilege.UPDATE, TablePrivilege.DELETE],
+)
+
+print('Done!')
+"
+```
+
+### LangGraph Memory Templates
+
+Grant on `public` schema:
+```python
+client.grant_schema(grantee=sp_id, schemas=['public'], privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE])
+client.grant_table(grantee=sp_id, tables=['public.store', 'public.store_vectors'], privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, TablePrivilege.UPDATE, TablePrivilege.DELETE])
+```
+
+### Long-Running Agent Templates
+
+Grant on `agent_server` schema:
+```python
+client.grant_schema(grantee=sp_id, schemas=['agent_server'], privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE])
+client.grant_table(grantee=sp_id, tables=['agent_server.responses', 'agent_server.messages'], privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, TablePrivilege.UPDATE, TablePrivilege.DELETE])
+```
+
+---
+
+## Step 7: Deploy and Run Your App
+
+**IMPORTANT:** Always run both `deploy` AND `run` commands:
+
+```bash
+# Deploy resources and upload files
+DATABRICKS_CONFIG_PROFILE=<profile> databricks bundle deploy
+
+# Start/restart the app with new code (REQUIRED!)
+DATABRICKS_CONFIG_PROFILE=<profile> databricks bundle run {{BUNDLE_NAME}}
+```
+
+> **Note:** `bundle deploy` only uploads files and configures resources. `bundle run` is required to actually start the app with the new code.
+
+---
+
+## Complete Example: databricks.yml with Lakebase
+
+```yaml
+bundle:
+  name: agent_langgraph
+
+resources:
+  experiments:
+    agent_langgraph_experiment:
+      name: /Users/${workspace.current_user.userName}/${bundle.name}-${bundle.target}
+
+  apps:
+    agent_langgraph:
+      name: "my-agent-app"
+      description: "Agent with long-term memory"
+      source_code_path: ./
+      config:
+        command: ["uv", "run", "start-app"]
+        env:
+          - name: MLFLOW_TRACKING_URI
+            value: "databricks"
+          - name: MLFLOW_REGISTRY_URI
+            value: "databricks-uc"
+          - name: API_PROXY
+            value: "http://localhost:8000/invocations"
+          - name: CHAT_APP_PORT
+            value: "3000"
+          - name: CHAT_PROXY_TIMEOUT_SECONDS
+            value: "300"
+          # Reference experiment resource
+          - name: MLFLOW_EXPERIMENT_ID
+            value_from: "experiment"
+          # Lakebase instance name (resolved from database resource)
+          - name: LAKEBASE_INSTANCE_NAME
+            value_from: "database"
+          # Embedding configuration
+          - name: EMBEDDING_ENDPOINT
+            value: "databricks-gte-large-en"
+          - name: EMBEDDING_DIMS
+            value: "1024"
+
+      resources:
+        - name: 'experiment'
+          experiment:
+            experiment_id: "${resources.experiments.agent_langgraph_experiment.id}"
+            permission: 'CAN_MANAGE'
+
+        # Lakebase instance for long-term memory
+        - name: 'database'
+          database:
+            instance_name: '<your-lakebase-instance-name>'
+            database_name: 'databricks_postgres'
+            permission: 'CAN_CONNECT_AND_CREATE'
+
+targets:
+  dev:
+    mode: development
+    default: true
+```
+
+---
+
+## Troubleshooting
+
+| Issue | Cause | Solution |
+|-------|-------|----------|
+| **"embedding_dims is required when embedding_endpoint is specified"** | Missing `embedding_dims` parameter | Add `embedding_dims=1024` to AsyncDatabricksStore |
+| **"relation 'store' does not exist"** | Tables not initialized | Run `await store.setup()` locally first (Step 5) |
+| **"Unable to resolve Lakebase instance 'None'"** | Missing env var in deployed app | Add `LAKEBASE_INSTANCE_NAME` to databricks.yml `config.env` |
+| **"Unable to resolve Lakebase instance '...database.cloud.databricks.com'"** | Used value_from instead of value | Use `value: "<instance-name>"` not `value_from` for Lakebase |
+| **"permission denied for table store"** | Missing grants | The `database` resource in DAB should handle this; verify the resource is configured |
+| **"Failed to connect to Lakebase"** | Wrong instance name | Verify instance name in databricks.yml and .env |
+| **Connection pool errors on exit** | Python cleanup race | Ignore `PythonFinalizationError` - it's harmless |
+| **App not updated after deploy** | Forgot to run bundle | Run `databricks bundle run agent_langgraph` after deploy |
+| **value_from not resolving** | Resource name mismatch | Ensure `value_from` value matches `name` in databricks.yml resources |
+
+---
+
+## Quick Reference: LakebaseClient API
+
+For manual permission management (usually not needed with DAB `database` resource):
+
+```python
+from databricks_ai_bridge.lakebase import LakebaseClient, SchemaPrivilege, TablePrivilege
+
+client = LakebaseClient(instance_name="...")
+
+# Create role (must do first)
+client.create_role(identity_name, "SERVICE_PRINCIPAL")
+
+# Grant schema (note: schemas is a list, grantee not role)
+client.grant_schema(
+    grantee="...",
+    schemas=["public"],
+    privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE],
+)
+
+# Grant tables (note: tables includes schema prefix)
+client.grant_table(
+    grantee="...",
+    tables=["public.store"],
+    privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, ...],
+)
+
+# Execute raw SQL
+client.execute("SELECT * FROM pg_tables WHERE schemaname = 'public'")
+```
+
+### Service Principal Identifiers
+
+When granting permissions manually, note that Databricks apps have multiple identifiers:
+
+| Field | Format | Example |
+|-------|--------|---------|
+| `service_principal_id` | Numeric ID | `1234567890123456` |
+| `service_principal_client_id` | UUID | `a1b2c3d4-e5f6-7890-abcd-ef1234567890` |
+| `service_principal_name` | String name | `my-app-service-principal` |
+
+**Get all identifiers:**
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> databricks apps get <app-name> --output json | jq '{
+  id: .service_principal_id,
+  client_id: .service_principal_client_id,
+  name: .service_principal_name
+}'
+```
+
+**Which to use:**
+- `LakebaseClient.create_role()` - Use `service_principal_client_id` (UUID) or `service_principal_name`
+- Raw SQL grants - Use `service_principal_client_id` (UUID)
+
+---
+
+## Next Steps
+
+- Add memory to agent code: see **agent-memory** skill
+- Test locally: see **run-locally** skill
+- Deploy: see **deploy** skill
diff --git a/agent-supervisor-api/.claude/skills/migrate-from-model-serving/SKILL.md b/agent-supervisor-api/.claude/skills/migrate-from-model-serving/SKILL.md
new file mode 100644
index 00000000..4287b4af
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/migrate-from-model-serving/SKILL.md
@@ -0,0 +1,965 @@
+---
+name: migrate-from-model-serving
+description: "Migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps. Use when: (1) User wants to migrate from Model Serving to Apps, (2) User has a ResponsesAgent with predict()/predict_stream() methods, (3) User wants to convert to @invoke/@stream decorators."
+---
+
+# Model Serving to Databricks Apps Migration Guide
+
+This guide instructs LLM coding agents how to migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps.
+
+---
+
+## Overview
+
+**Goal:** Migrate an agent deployed on Databricks Model Serving (using `ResponsesAgent` with `predict()`/`predict_stream()`) to Databricks Apps (using MLflow GenAI Server with `@invoke`/`@stream` decorators).
+
+**Key Transformation:**
+- Model Serving: Synchronous `predict()` and `predict_stream()` methods on a class
+- Apps: Functions with `@invoke` and `@stream` decorators (sync or async, based on user preference)
+
+**Deliverables:** After migration is complete, you will have:
+
+```
+<working-directory>/
+├── original_mlflow_model/    # Downloaded artifacts from Model Serving
+│   ├── MLmodel
+│   ├── code/
+│   │   └── agent.py
+│   ├── input_example.json
+│   └── requirements.txt
+│
+└── <app-name>/               # New Databricks App (ready to deploy)
+    ├── agent_server/
+    │   ├── agent.py          # Migrated agent code
+    │   └── ...
+    ├── databricks.yml        # Bundle config with resources
+    ├── pyproject.toml
+    ├── requirements.txt
+    └── ...
+```
+
+> **`<app-name>`** is the name the user provides at the start of the migration. It is used as both the directory name and the Databricks App name at deploy time.
+
+---
+
+## Before You Begin: Gather User Inputs
+
+**Before doing anything else, ask the user three questions.** Use the `AskUserQuestion` tool to collect all answers at once so the user is only prompted once, then Claude can execute the rest of the migration autonomously.
+
+**Questions to ask:**
+
+1. **Databricks profile:** Which Databricks CLI profile should be used for the workspace where the Model Serving endpoint lives? (Run `databricks auth profiles` first to list available profiles and their workspaces, then present the options to the user.)
+2. **App name:** What should the new Databricks App be named? (Must be lowercase, can contain letters, numbers, and hyphens, and must be unique within the workspace.)
+3. **Async migration:** Would you like to migrate your agent code to be fully async?
+   - **Yes (Recommended):** Converts all I/O operations to async (`await`/`async for`), enabling higher concurrency on smaller compute — no more threads sitting idle while waiting for LLM responses or long-running tool calls.
+   - **No:** Keeps your existing synchronous code with minimal changes — just extracts the logic from the `ResponsesAgent` class and wraps it with `@invoke`/`@stream` decorators. Simpler migration, but each request blocks a thread while waiting for I/O.
+
+Store the answers as:
+- `<profile>` — used for ALL `databricks` CLI commands throughout the migration (via `--profile <profile>`)
+- `<app-name>` — used as both the directory name for the migrated app AND the app name when deploying with `databricks bundle deploy`
+- `<async>` — `yes` or `no`, determines whether to convert the agent code to async or keep it synchronous
+
+### Validate Authentication
+
+After receiving the user's answers, validate the selected profile:
+
+```bash
+databricks current-user me --profile <profile>
+```
+
+If this fails with an authentication error, prompt the user to re-authenticate:
+
+```bash
+databricks auth login --profile <profile>
+```
+
+> **Important:** Remember to include `--profile <profile>` on every `databricks` CLI command throughout the migration.
+
+### Create the App Directory
+
+Copy all scaffold files from the current working directory into a new directory named `<app-name>/`. Exclude instruction files (`AGENTS.md`, `CLAUDE.md`), hidden directories (`.claude/`, `.git/`), and any migration artifacts (e.g., `original_mlflow_model/`, `.migration-venv/`). Do NOT search for or copy scaffold files from other directories or templates — everything you need is right here.
+
+All subsequent migration steps operate inside the `<app-name>/` directory.
+
+> **Note:** The `agent_server/agent.py` scaffold is intentionally framework-agnostic — it contains the `@invoke`/`@stream` decorator pattern with TODO placeholders. Step 3 (Migrate the Agent Code) will replace these placeholders with the actual agent logic from the original Model Serving endpoint.
+
+### Create Task List
+
+**Create a task list to track progress.** This helps the user follow along and see what's completed, in progress, and pending.
+
+> **User tip:** Press `Ctrl+T` to toggle the task list view in your terminal. The display shows up to 10 tasks at a time with status indicators.
+
+Create the following tasks using the `TaskCreate` tool:
+
+| Task | Description |
+|------|-------------|
+| **Authenticate to Databricks** | Verify Databricks CLI authentication and validate the selected profile |
+| **Download original agent artifacts** | Download the MLflow model artifacts from Model Serving endpoint |
+| **Analyze and understand agent code** | Examine the original agent code, identify tools, resources, and dependencies |
+| **Migrate agent code to Apps format** | Transform ResponsesAgent class to @invoke/@stream decorated functions |
+| **Set up and configure the app** | Install dependencies, run quickstart, configure environment |
+| **Test agent locally** | Start local server and verify the agent works correctly |
+| **Deploy to Databricks Apps** | Configure databricks.yml resources and deploy with Databricks Asset Bundles |
+| **Test deployed app** | Verify the deployed app responds correctly |
+
+Update task status as you progress:
+- Mark tasks as `in_progress` when starting each step
+- Mark tasks as `completed` when finished
+- This gives the user visibility into migration progress
+
+---
+
+## Step 1: Download the Original Agent Code
+
+> **Task:** Mark "Authenticate to Databricks" as `completed`. Mark "Download original agent artifacts" as `in_progress`.
+>
+> **Note:** The `<profile>` and `<app-name>` values were collected from the user in the "Before You Begin" section. Use them throughout.
+
+Download the original agent code from the Model Serving endpoint. This requires setting up a virtual environment with MLflow to access the model artifacts.
+
+### 1.1 Get Model Info from Endpoint
+
+If you have a serving endpoint name, extract the model details:
+
+```bash
+# Get endpoint info (remember to include --profile if using non-default)
+databricks serving-endpoints get <endpoint-name> --profile <profile> --output json
+```
+
+Look for `served_entities[0].entity_name` (model name) and `entity_version` in the response. Find the entity with 100% traffic in `traffic_config.routes`.
+
+### 1.2 Download Model Artifacts
+
+Use `uv run --with` to download artifacts without creating a separate virtual environment. The `mlflow[databricks]` extra includes `boto3` for Unity Catalog artifact access:
+
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> uv run --no-project \
+  --with "mlflow[databricks]>=2.15.0" \
+  --with "databricks-sdk>=0.30.0" \
+  python3 << 'EOF'
+import mlflow
+
+mlflow.set_tracking_uri("databricks")
+
+# Replace with actual values from step 1.1
+MODEL_NAME = "<model-name>"
+VERSION = "<version>"
+
+print(f"Downloading model: models:/{MODEL_NAME}/{VERSION}")
+mlflow.artifacts.download_artifacts(
+    artifact_uri=f"models:/{MODEL_NAME}/{VERSION}",
+    dst_path="./original_mlflow_model"
+)
+print("Download complete! Artifacts saved to ./original_mlflow_model")
+EOF
+```
+
+### 1.3 Verify Downloaded Artifacts
+
+Check that the key files exist and understand the full structure:
+
+```bash
+# List all downloaded files recursively
+find ./original_mlflow_model -type f | head -50
+
+# Check for MLmodel file (contains resource requirements)
+cat ./original_mlflow_model/MLmodel
+
+# Check for input example (useful for testing)
+cat ./original_mlflow_model/input_example.json 2>/dev/null
+```
+
+**Examine the `/code` folder** - contains all code dependencies logged via `code_paths=["..."]`:
+
+```bash
+# List all code files
+ls -la ./original_mlflow_model/code/
+
+# The main agent is typically agent.py, but there may be additional modules
+find ./original_mlflow_model/code -name "*.py" -type f
+```
+
+**Examine the `/artifacts` folder** (if present) - contains artifacts logged via `artifacts={...}`:
+
+```bash
+# Check for artifacts folder
+ls -la ./original_mlflow_model/artifacts/ 2>/dev/null
+
+# List all artifacts
+find ./original_mlflow_model/artifacts -type f 2>/dev/null
+```
+
+> **Important:** Take note of ALL files in `/code` and `/artifacts`. You will need to copy these to the migrated app and ensure imports still work correctly.
+
+### Expected Output Structure
+
+After successful download, you should have:
+
+```
+./original_mlflow_model/
+├── MLmodel              # Model metadata and resource requirements
+├── code/                # Code logged via code_paths=["..."]
+│   ├── agent.py         # Main agent implementation
+│   ├── utils.py         # (optional) Helper modules
+│   ├── tools.py         # (optional) Custom tool definitions
+│   └── ...              # Any other code dependencies
+├── artifacts/           # (optional) Artifacts logged via artifacts={...}
+│   ├── config.yaml      # (optional) Configuration files
+│   ├── prompts/         # (optional) Prompt templates
+│   └── ...              # Any other artifacts (data files, etc.)
+├── input_example.json   # Sample request for testing
+├── requirements.txt     # Original dependencies
+└── ...
+```
+
+### Key Files to Examine
+
+1. **`code/agent.py`** - Contains the `ResponsesAgent` class with `predict()` and `predict_stream()` methods
+2. **`code/*.py`** - Any additional Python modules the agent imports
+3. **`MLmodel`** - Contains the `resources` section listing required Databricks resources
+4. **`artifacts/`** - Any configuration files, prompts, or data files the agent uses
+5. **`input_example.json`** - Use this to test the migrated agent
+
+### Troubleshooting Model Download
+
+**"Unable to import necessary dependencies to access model version files in Unity Catalog"**
+This means `boto3` is missing. Ensure you're using `mlflow[databricks]` (not just `mlflow`) in the `--with` flag — the `[databricks]` extra includes `boto3`.
+
+**"INVALID_PARAMETER_VALUE" or authentication errors**
+Re-authenticate with Databricks (include profile if non-default):
+```bash
+databricks auth login --profile <profile>
+```
+
+**Wrong workspace / Model not found**
+Make sure you're using the correct profile that corresponds to the workspace where the model is deployed:
+```bash
+# List profiles to see which workspace each points to
+databricks auth profiles
+
+# Verify you can access the workspace
+databricks current-user me --profile <profile>
+
+# List models in that workspace
+databricks registered-models list --profile <profile>
+databricks model-versions list --name "<model-name>" --profile <profile>
+```
+
+---
+
+## Step 2: Understand the Key Transformations
+
+> **Task:** Mark "Download original agent artifacts" as `completed`. Mark "Analyze and understand agent code" as `in_progress`.
+
+### Entry Point Transformation
+
+In both cases, the `ResponsesAgent` class is replaced with decorated functions. The difference is whether those functions are async or sync.
+
+**Model Serving (OLD):**
+```python
+from mlflow.pyfunc import ResponsesAgent, ResponsesAgentRequest, ResponsesAgentResponse
+
+class MyAgent(ResponsesAgent):
+    def predict(self, request: ResponsesAgentRequest, params=None) -> ResponsesAgentResponse:
+        # Synchronous implementation
+        ...
+        return ResponsesAgentResponse(output=outputs)
+
+    def predict_stream(self, request: ResponsesAgentRequest, params=None):
+        # Synchronous generator
+        for chunk in ...:
+            yield ResponsesAgentStreamEvent(...)
+```
+
+**Apps — Async (if `<async>` = yes):**
+```python
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+    ResponsesAgentStreamEvent,
+)
+
+@invoke()
+async def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    # Async implementation - typically calls streaming() and collects results
+    outputs = [
+        event.item
+        async for event in streaming(request)
+        if event.type == "response.output_item.done"
+    ]
+    return ResponsesAgentResponse(output=outputs)
+
+@stream()
+async def streaming(request: ResponsesAgentRequest) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
+    # Async generator
+    async for event in ...:
+        yield event
+```
+
+**Apps — Sync (if `<async>` = no):**
+```python
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+    ResponsesAgentStreamEvent,
+)
+
+@invoke()
+def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    # Same sync logic from original predict(), extracted from the class
+    ...
+    return ResponsesAgentResponse(output=outputs)
+
+@stream()
+def streaming(request: ResponsesAgentRequest):
+    # Same sync generator from original predict_stream(), extracted from the class
+    for chunk in ...:
+        yield ResponsesAgentStreamEvent(...)
+```
+
+### Key Differences
+
+| Aspect | Model Serving | Apps (async) | Apps (sync) |
+|--------|--------------|------|------|
+| Structure | `class MyAgent(ResponsesAgent)` | Decorated functions | Decorated functions |
+| Functions | `def predict()` / `def predict_stream()` | `async def` with `await` | `def` (same as original) |
+| Streaming | Sync generator (`yield`) | Async generator (`async for` / `yield`) | Sync generator (`yield`) |
+| Server | MLflow Model Server | MLflow GenAI Server (FastAPI) | MLflow GenAI Server (FastAPI) |
+| Deployment | `databricks_agents.deploy()` | `databricks bundle deploy` + `bundle run` | `databricks bundle deploy` + `bundle run` |
+
+### Async Patterns (only if `<async>` = yes)
+
+> **Skip this section if the user chose synchronous migration.** The sync path keeps all original I/O calls as-is.
+
+All I/O operations must be converted to async:
+
+```python
+# OLD (sync)
+response = client.chat(messages)
+
+# NEW (async)
+response = await client.achat(messages)
+
+# OLD (sync iteration)
+for chunk in stream:
+    yield chunk
+
+# NEW (async iteration)
+async for chunk in stream:
+    yield chunk
+```
+
+---
+
+## Step 3: Migrate the Agent Code
+
+> **Task:** Mark "Analyze and understand agent code" as `completed`. Mark "Migrate agent code to Apps format" as `in_progress`.
+
+### 3.1 Copy Code Dependencies and Artifacts
+
+The original MLflow model may contain multiple code files and artifacts that need to be migrated.
+
+**Copy all code files from `/code` to `agent_server/`:**
+
+```bash
+# Copy all Python files from original code folder
+cp ./original_mlflow_model/code/*.py ./<app-name>/agent_server/
+
+# If there are subdirectories with code, copy those too
+# cp -r ./original_mlflow_model/code/submodule ./<app-name>/agent_server/
+```
+
+**Copy artifacts (if present):**
+
+```bash
+# Create an artifacts directory in the migrated app if needed
+mkdir -p ./<app-name>/agent_server/artifacts
+
+# Copy all artifacts
+cp -r ./original_mlflow_model/artifacts/* ./<app-name>/agent_server/artifacts/ 2>/dev/null || true
+```
+
+**Fix import paths after copying:**
+
+When code files are moved, imports may break. Check and update imports in all copied files:
+
+```python
+# BEFORE (if files were in different locations):
+from code.utils import helper_function
+from artifacts.prompts import SYSTEM_PROMPT
+
+# AFTER (files are now in agent_server/):
+from agent_server.utils import helper_function
+# Or if in same directory:
+from .utils import helper_function
+
+# For artifacts, update file paths:
+# BEFORE:
+with open("artifacts/config.yaml") as f:
+# AFTER:
+import os
+config_path = os.path.join(os.path.dirname(__file__), "artifacts", "config.yaml")
+with open(config_path) as f:
+```
+
+> **Important:** Review each copied file and ensure all imports resolve correctly. The most common issues are:
+> - Relative imports that assumed a different directory structure
+> - Hardcoded file paths to artifacts
+> - Missing `__init__.py` files for package imports
+
+### 3.2 Extract Configuration
+
+From the original agent code, identify and preserve:
+- **LLM endpoint name** (e.g., `databricks-claude-sonnet-4-5`)
+- **System prompt**
+- **Tool definitions**
+- **Any custom logic**
+
+### 3.3 Update the Agent Entry Point
+
+The approach depends on whether the user chose async or sync migration.
+
+---
+
+#### Path A: Synchronous Migration (`<async>` = no)
+
+This is the minimal-changes path. Extract the logic from the `ResponsesAgent` class, wrap it with `@invoke`/`@stream` decorators, and keep all code synchronous.
+
+Edit `<app-name>/agent_server/agent.py`:
+
+1. **Replace the scaffold with the original agent logic.** The core transformation is extracting the class methods into decorated functions:
+
+```python
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+    ResponsesAgentStreamEvent,
+)
+
+# Move any class __init__ or class-level setup to module level
+# e.g., client initialization, tool setup, etc.
+
+@invoke()
+def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    # Paste the body of the original predict() method here
+    # Remove 'self.' references — replace with module-level variables
+    # Remove 'params' parameter (not used in Apps)
+    ...
+    return ResponsesAgentResponse(output=outputs)
+
+@stream()
+def streaming(request: ResponsesAgentRequest):
+    # Paste the body of the original predict_stream() method here
+    # Remove 'self.' references — replace with module-level variables
+    # Remove 'params' parameter (not used in Apps)
+    for chunk in ...:
+        yield ResponsesAgentStreamEvent(...)
+```
+
+2. **Key changes from class to functions:**
+   - Remove the `class MyAgent(ResponsesAgent):` wrapper
+   - Remove `self` parameter from all methods
+   - Move `__init__` logic (client creation, tool setup) to module-level code
+   - Replace `self.some_attribute` with module-level variables
+   - Add `@invoke()` decorator to the non-streaming function
+   - Add `@stream()` decorator to the streaming function
+
+3. **Keep all other code as-is** — no need to convert sync calls to async, no need to change `for` to `async for`, no need to add `await`.
+
+---
+
+#### Path B: Async Migration (`<async>` = yes)
+
+This path converts all I/O operations to async for higher concurrency. More changes are required, but the result is a more efficient server.
+
+Edit `<app-name>/agent_server/agent.py`:
+
+1. **Update the LLM endpoint:**
+   ```python
+   LLM_ENDPOINT_NAME = "<your-endpoint-from-original>"
+   ```
+
+2. **Update the system prompt:**
+   ```python
+   SYSTEM_PROMPT = """<your-system-prompt-from-original>"""
+   ```
+
+3. **Add your custom tools:**
+   If your original agent had custom tools, add them:
+   ```python
+   from langchain_core.tools import tool
+
+   @tool
+   async def my_custom_tool(arg: str) -> str:
+       """Tool description."""
+       # Your tool logic (make async if needed)
+       return result
+   ```
+
+4. **Convert all I/O to async:**
+   - `def predict()` → `async def non_streaming()`
+   - `def predict_stream()` → `async def streaming()`
+   - `client.chat()` → `await client.achat()`
+   - `for chunk in stream:` → `async for chunk in stream:`
+   - Sync HTTP calls → `await` async equivalents
+
+5. **Preserve any special logic:**
+   Migrate any custom preprocessing, postprocessing, or business logic from the original agent.
+
+---
+
+### 3.4 Handle Stateful Agents
+
+**If original uses checkpointer (short-term memory):**
+- Add checkpointer with Lakebase integration (use `AsyncCheckpointSaver` if async, or sync equivalent if sync)
+- Configure `LAKEBASE_INSTANCE_NAME` in `.env`
+- Extract thread_id from `request.custom_inputs` or `request.context.conversation_id`
+
+**If original uses store (long-term memory):**
+- Add store with Lakebase integration (use `AsyncDatabricksStore` if async, or sync equivalent if sync)
+- Configure `LAKEBASE_INSTANCE_NAME` in `.env`
+- Extract user_id from `request.custom_inputs` or `request.context.user_id`
+
+---
+
+## Step 4: Set Up the App
+
+> **Task:** Mark "Migrate agent code to Apps format" as `completed`. Mark "Set up and configure the app" as `in_progress`.
+
+### 4.1 Verify Build Configuration
+
+Before installing dependencies, ensure a README file exists (hatchling requires this):
+
+**Ensure a README file exists:**
+
+```bash
+# Create a minimal README if one doesn't exist
+if [ ! -f "README.md" ]; then
+  echo "# Migrated Agent App" > README.md
+fi
+```
+
+### 4.2 Install Dependencies
+
+```bash
+cd <app-name>
+uv sync
+```
+
+### 4.3 Create requirements.txt for Databricks Apps
+
+Databricks Apps requires a `requirements.txt` file with `uv` to install dependencies from `pyproject.toml`:
+
+```bash
+echo "uv" > requirements.txt
+```
+
+### 4.4 Run Quickstart
+
+Run the `uv run quickstart` script to quickly set up your local environment. This is the **recommended** way to configure the app as it handles all necessary setup automatically.
+
+```bash
+uv run quickstart
+```
+
+This script will:
+
+1. Verify uv, nvm, and Databricks CLI installations
+2. Configure Databricks authentication
+3. Configure agent tracing, by creating and linking an MLflow experiment to your app
+4. Configure `.env` with the necessary environment variables
+
+> **Important:** The quickstart script creates the MLflow experiment that the app needs for logging traces and models. This experiment will be added as a resource when deploying the app.
+
+If there are issues with the quickstart script, refer to the manual setup in section 4.5.
+
+### 4.5 Manual Environment Configuration (Optional)
+
+If you need to manually configure the environment or add additional variables, edit `.env`:
+
+```bash
+# Databricks authentication
+DATABRICKS_CONFIG_PROFILE=<your-profile>
+
+# MLflow experiment (created by quickstart, or create manually)
+MLFLOW_EXPERIMENT_ID=<experiment-id>
+
+# Example: Lakebase for stateful agents
+LAKEBASE_INSTANCE_NAME=<your-lakebase-instance>
+
+# Example: Custom API keys
+MY_API_KEY=<value>
+```
+
+To manually create an MLflow experiment:
+
+```bash
+databricks experiments create-experiment "/Users/<your-username>/<app-name>" --profile <profile>
+```
+
+---
+
+## Step 5: Test Locally
+
+> **Task:** Mark "Set up and configure the app" as `completed`. Mark "Test agent locally" as `in_progress`.
+
+> Test your migrated agent locally before deploying to Databricks Apps. This helps catch configuration issues early and ensures the agent works correctly.
+
+### 5.1 Start the Server
+
+After the quickstart setup is complete, start the agent server and chat app locally:
+
+```bash
+cd <app-name>
+uv run start-app
+```
+
+Wait for the server to start. You should see output indicating the server is running on `http://localhost:8000`.
+
+> **Note:** If you only need the API endpoint (without the chat UI), you can run `uv run start-server` instead.
+
+### 5.2 Test with Original Input Example
+
+The original model artifacts include an `input_example.json` file that contains a sample request. Use this to verify your migrated agent produces the same behavior. If there's no valid sample request then figure out a valid sample request to query agent based on its code.
+
+```bash
+# Check the original input example (from the <app-name> directory)
+cat ../original_mlflow_model/input_example.json
+```
+
+Example content:
+```json
+{"input": [{"role": "user", "content": "What is an LLM agent?"}], "custom_inputs": {"thread_id": "example-thread-123"}}
+```
+
+Test your local server with this input:
+
+```bash
+# Test with the original input example
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d "$(cat ../original_mlflow_model/input_example.json)"
+```
+
+### 5.3 Test Basic Requests
+
+```bash
+# Non-streaming
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hello!"}]}'
+
+# Streaming
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hello!"}], "stream": true}'
+```
+
+### 5.4 Test with Custom Inputs (for stateful agents)
+
+```bash
+# With thread_id for short-term memory
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hi"}], "custom_inputs": {"thread_id": "test-123"}}'
+
+# With user_id for long-term memory
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hi"}], "custom_inputs": {"user_id": "user@example.com"}}'
+```
+
+### 5.5 Verify Before Proceeding
+
+Before proceeding to deployment, ensure:
+- [ ] The server starts without errors
+- [ ] The original input example returns a valid response
+- [ ] Streaming responses work correctly
+- [ ] Custom inputs (thread_id, user_id) are handled properly (if applicable)
+
+> **Note:** Only proceed to Step 6 (Deploy) after confirming the agent works correctly locally.
+
+---
+
+## Step 6: Deploy to Databricks Apps
+
+> **Task:** Mark "Test agent locally" as `completed`. Mark "Deploy to Databricks Apps" as `in_progress`.
+
+This step uses Databricks Asset Bundles (DAB) to deploy. The scaffold includes a `databricks.yml` that you need to update with the app name and resources from the original model.
+
+### 6.1 Extract Resources from Original Model
+
+The original model's `MLmodel` file contains a `resources` section that lists all Databricks resources the agent needs access to. Check `../original_mlflow_model/MLmodel` (or `./original_mlflow_model/MLmodel` if you're in the parent directory) for content like:
+
+```yaml
+resources:
+  api_version: '1'
+  databricks:
+    lakebase:
+    - name: lakebase
+    serving_endpoint:
+    - name: databricks-claude-sonnet-4-5
+```
+
+### 6.2 Update `databricks.yml` with Resources
+
+The scaffold includes a `databricks.yml` with the experiment resource pre-configured. You need to:
+
+1. **Update the app name** to `<app-name>` (the name provided by the user) in both the `resources.apps.agent_migration.name` field and the `targets.prod.resources.apps.agent_migration.name` field.
+2. **Add resources** extracted from the original MLmodel file to the `resources.apps.agent_migration.resources` list.
+
+**Resource Type Mapping (MLmodel → `databricks.yml`):**
+
+| MLmodel Resource | `databricks.yml` Resource | Key Fields |
+|------------------|--------------------------|------------|
+| `serving_endpoint` | `serving_endpoint` | `name`, `permission` (CAN_QUERY) |
+| `lakebase` | `database` | `database_name: databricks_postgres`, `instance_name`, `permission` (CAN_CONNECT_AND_CREATE) |
+| `vector_search_index` | `uc_securable` | `securable_full_name`, `securable_type: TABLE`, `permission: SELECT` |
+| `function` | `uc_securable` | `securable_full_name`, `securable_type: FUNCTION`, `permission: EXECUTE` |
+| `table` | `uc_securable` | `securable_full_name`, `securable_type: TABLE`, `permission: SELECT` |
+| `uc_connection` | `uc_securable` | `securable_full_name`, `securable_type: CONNECTION`, `permission: USE_CONNECTION` |
+| `sql_warehouse` | `sql_warehouse` | `id`, `permission` (CAN_USE) |
+| `genie_space` | `genie_space` | `space_id`, `permission` (CAN_RUN) |
+
+> **Note:** The `experiment` resource is already configured in the scaffold `databricks.yml` and is automatically created by the bundle. You do not need to add it manually.
+
+**Example: `databricks.yml` for an agent with a serving endpoint and UC function:**
+
+```yaml
+resources:
+  experiments:
+    agent_migration_experiment:
+      name: /Users/${workspace.current_user.userName}/${bundle.name}-${bundle.target}
+
+  apps:
+    agent_migration:
+      name: "<app-name>"  # Update to user's app name
+      description: "Migrated agent from Model Serving to Databricks Apps"
+      source_code_path: ./
+      resources:
+        - name: 'experiment'
+          experiment:
+            experiment_id: "${resources.experiments.agent_migration_experiment.id}"
+            permission: 'CAN_MANAGE'
+        - name: 'serving-endpoint'
+          serving_endpoint:
+            name: 'databricks-claude-sonnet-4-5'
+            permission: 'CAN_QUERY'
+        - name: 'python-exec'
+          uc_securable:
+            securable_full_name: 'system.ai.python_exec'
+            securable_type: 'FUNCTION'
+            permission: 'EXECUTE'
+
+targets:
+  prod:
+    resources:
+      apps:
+        agent_migration:
+          name: "<app-name>"  # Same name for production
+```
+
+**Example: Adding Lakebase resources (for stateful agents):**
+
+```yaml
+        - name: 'database'
+          database:
+            database_name: 'databricks_postgres'
+            instance_name: 'lakebase'
+            permission: 'CAN_CONNECT_AND_CREATE'
+```
+
+### 6.3 Deploy with Databricks Asset Bundles
+
+From inside the `<app-name>` directory, validate, deploy, and run:
+
+```bash
+# 1. Validate bundle configuration (catches errors before deploy)
+databricks bundle validate --profile <profile>
+
+# 2. Deploy the bundle (creates/updates resources, uploads files)
+databricks bundle deploy --profile <profile>
+
+# 3. Run the app (starts/restarts with uploaded source code) - REQUIRED!
+databricks bundle run agent_migration --profile <profile>
+```
+
+> **Important:** `bundle deploy` only uploads files and configures resources. `bundle run` is **required** to actually start/restart the app with the new code. If you only run `deploy`, the app will continue running old code!
+
+### 6.4 Test Deployed App
+
+> **Task:** Mark "Deploy to Databricks Apps" as `completed`. Mark "Test deployed app" as `in_progress`.
+
+```bash
+# Get the app URL
+APP_URL=$(databricks apps get <app-name> --profile <profile> --output json | jq -r '.url')
+
+# Get OAuth token
+TOKEN=$(databricks auth token --profile <profile> | jq -r .access_token)
+
+# Query the app
+curl -X POST ${APP_URL}/invocations \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hello!"}]}'
+```
+
+Once the deployed app responds successfully:
+
+> **Task:** Mark "Test deployed app" as `completed`. Migration complete!
+
+### 6.5 Deployment Troubleshooting
+
+If you encounter issues during deployment, refer to the **deploy** skill for detailed guidance.
+
+**Debug commands:**
+```bash
+# Validate bundle configuration
+databricks bundle validate --profile <profile>
+
+# View app logs
+databricks apps logs <app-name> --profile <profile> --follow
+
+# Check app status
+databricks apps get <app-name> --profile <profile> --output json | jq '{app_status, compute_status}'
+
+# Get app URL
+databricks apps get <app-name> --profile <profile> --output json | jq -r '.url'
+```
+
+**"App already exists" error:**
+If `databricks bundle deploy` fails because the app already exists, refer to the **deploy** skill for instructions on binding an existing app to the bundle.
+
+---
+
+## Reference: App File Structure
+
+```
+<app-name>/
+├── agent_server/
+│   ├── __init__.py
+│   ├── agent.py          # Main agent logic - THIS IS WHERE YOU MIGRATE TO
+│   ├── start_server.py   # FastAPI server setup
+│   ├── utils.py          # Helper utilities
+│   └── evaluate_agent.py # Agent evaluation
+├── scripts/
+│   ├── __init__.py
+│   ├── quickstart.py     # Setup script
+│   └── start_app.py      # App startup
+├── databricks.yml        # Databricks Asset Bundle configuration (resources, config, targets)
+├── pyproject.toml        # Dependencies (for local dev with uv)
+├── requirements.txt      # REQUIRED: Must contain "uv" for Databricks Apps
+├── .env.example          # Environment template
+└── README.md
+```
+
+> **IMPORTANT:** The `requirements.txt` file must exist and contain `uv` so that Databricks Apps can install dependencies using the `pyproject.toml`. Without this file, the app will fail to start.
+
+---
+
+## Reference: Common Migration Patterns
+
+### Pattern 1: Simple Chat Agent
+
+**Original:**
+```python
+class ChatAgent(ResponsesAgent):
+    def predict(self, request, params=None):
+        messages = to_chat_completions_input(request.input)
+        response = self.llm.invoke(messages)
+        return ResponsesAgentResponse(output=[...])
+```
+
+**Migrated (sync):**
+```python
+llm = ...  # Move class-level init to module level
+
+@invoke()
+def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    messages = to_chat_completions_input(request.input)
+    response = llm.invoke(messages)
+    return ResponsesAgentResponse(output=[...])
+
+@stream()
+def streaming(request: ResponsesAgentRequest):
+    # Original predict_stream() body, with self. removed
+    ...
+```
+
+**Migrated (async):**
+```python
+@invoke()
+async def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    outputs = [e.item async for e in streaming(request) if e.type == "response.output_item.done"]
+    return ResponsesAgentResponse(output=outputs)
+
+@stream()
+async def streaming(request: ResponsesAgentRequest) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
+    messages = {"messages": to_chat_completions_input([i.model_dump() for i in request.input])}
+    agent = await init_agent()
+    async for event in process_agent_astream_events(agent.astream(messages, stream_mode=["updates", "messages"])):
+        yield event
+```
+
+### Pattern 2: Agent with Custom Tools
+
+**Sync:** Keep tools as-is from the original code.
+
+**Async:** Migrate tools to async LangChain tools:
+
+```python
+from langchain_core.tools import tool
+
+@tool
+async def search_docs(query: str) -> str:
+    """Search the documentation."""
+    results = await vector_store.asimilarity_search(query)
+    return format_results(results)
+```
+
+### Pattern 3: Using LangGraph with create_agent (async only)
+
+```python
+from langchain.agents import create_agent
+from databricks_langchain import ChatDatabricks
+
+async def init_agent():
+    tools = await mcp_client.get_tools()  # MCP tools are async
+    model = ChatDatabricks(endpoint=LLM_ENDPOINT_NAME)
+    return create_agent(model=model, tools=tools, system_prompt=SYSTEM_PROMPT)
+```
+
+---
+
+## Reference: Useful Resources
+
+- **Responses API Docs:** https://mlflow.org/docs/latest/genai/serving/responses-agent/
+- **Agent Framework:** https://docs.databricks.com/aws/en/generative-ai/agent-framework/
+- **Agent Tools:** https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool
+- **databricks-langchain SDK:** https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain
+
+---
+
+## Troubleshooting
+
+### "Module not found" errors
+```bash
+uv sync  # Reinstall dependencies
+```
+
+### Authentication errors
+```bash
+databricks auth login  # Re-authenticate
+```
+
+### Lakebase permission errors
+- Ensure the Lakebase instance is added as an app resource in Databricks UI
+- Grant appropriate permissions on the Lakebase instance
+
+### Async errors (async migration only)
+- Ensure all I/O calls use async versions (e.g., `await client.achat()` not `client.chat()`)
+- Use `async for` instead of `for` when iterating async generators
+- If you chose sync migration, these errors should not occur — double-check that you're not mixing sync and async patterns
diff --git a/agent-supervisor-api/.claude/skills/modify-agent/SKILL.md b/agent-supervisor-api/.claude/skills/modify-agent/SKILL.md
new file mode 100644
index 00000000..0ac0d7e3
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/modify-agent/SKILL.md
@@ -0,0 +1,147 @@
+---
+name: modify-agent
+description: "Modify agent code, add tools, or change configuration. Use when: (1) User says 'modify agent', 'add tool', 'change model', or 'edit agent.py', (2) Adding MCP servers to agent, (3) Changing agent instructions, (4) Understanding SDK patterns."
+---
+
+# Modify the Agent
+
+## Main File
+
+**`agent_server/agent.py`** - Agent logic, model selection, instructions, MCP servers
+
+## Key Files
+
+| File                             | Purpose                                       |
+| -------------------------------- | --------------------------------------------- |
+| `agent_server/agent.py`          | Agent logic, model, instructions, MCP servers |
+| `agent_server/start_server.py`   | FastAPI server + MLflow setup                 |
+| `agent_server/evaluate_agent.py` | Agent evaluation with MLflow scorers          |
+| `agent_server/utils.py`          | Databricks auth helpers, stream processing    |
+| `databricks.yml`                 | Bundle config & resource permissions          |
+
+## SDK Setup
+
+```python
+import mlflow
+from databricks_openai import AsyncDatabricksOpenAI
+from agents import set_default_openai_api, set_default_openai_client, Agent
+from agents.tracing import set_trace_processors
+
+# Set up async client (recommended for agent servers)
+set_default_openai_client(AsyncDatabricksOpenAI())
+set_default_openai_api("chat_completions")
+
+# Use MLflow for tracing (disables SDK's built-in tracing)
+set_trace_processors([])
+mlflow.openai.autolog()
+```
+
+## Adding MCP Servers
+
+```python
+from databricks_openai.agents import McpServer
+
+# UC Functions
+uc_server = McpServer(
+    url=f"{host}/api/2.0/mcp/functions/{catalog}/{schema}",
+    name="uc functions",
+)
+
+# Genie Space
+genie_server = McpServer(
+    url=f"{host}/api/2.0/mcp/genie/{space_id}",
+    name="genie space",
+)
+
+# Vector Search
+vector_server = McpServer(
+    url=f"{host}/api/2.0/mcp/vector-search/{catalog}/{schema}/{index}",
+    name="vector search",
+)
+
+# Add to agent
+agent = Agent(
+    name="my agent",
+    instructions="You are a helpful agent.",
+    model="databricks-claude-3-7-sonnet",
+    mcp_servers=[uc_server, genie_server, vector_server],
+)
+```
+
+**After adding MCP servers:** Grant permissions in `databricks.yml` (see **add-tools** skill)
+
+## Changing the Model
+
+Available models (check workspace for current list):
+
+- `databricks-claude-3-7-sonnet`
+- `databricks-claude-3-5-sonnet`
+- `databricks-meta-llama-3-3-70b-instruct`
+
+```python
+agent = Agent(
+    name="my agent",
+    model="databricks-claude-3-7-sonnet",  # Change here
+    ...
+)
+```
+
+**Note:** Some workspaces require granting the app access to the serving endpoint in `databricks.yml`. See the **add-tools** skill and `examples/serving-endpoint.yaml`.
+
+## Changing Instructions
+
+```python
+agent = Agent(
+    name="my agent",
+    instructions="""You are a helpful data analyst assistant.
+
+    You have access to:
+    - Company sales data via Genie
+    - Product documentation via vector search
+
+    Always cite your sources when answering questions.""",
+    ...
+)
+```
+
+## Running the Agent
+
+```python
+from agents import Runner
+
+# Non-streaming
+messages = [{"role": "user", "content": "hi"}]
+result = await Runner.run(agent, messages)
+
+# Streaming
+result = Runner.run_streamed(agent, input=messages)
+async for event in result.stream_events():
+    # Process stream events
+    pass
+```
+
+**Converting to Responses API format:** Use `process_agent_stream_events()` from `agent_server/utils.py` to convert streaming output to Responses API compatible format:
+
+```python
+from agent_server.utils import process_agent_stream_events
+
+result = Runner.run_streamed(agent, input=messages)
+async for event in process_agent_stream_events(result.stream_events()):
+    yield event  # Yields ResponsesAgentStreamEvent objects
+```
+
+## External Resources
+
+1. [databricks-openai SDK](https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/openai)
+2. [Agent examples](https://github.com/databricks/app-templates)
+3. [Agent Framework docs](https://docs.databricks.com/aws/en/generative-ai/agent-framework/)
+4. [Adding tools](https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool)
+5. [OpenAI Agents SDK](https://platform.openai.com/docs/guides/agents-sdk)
+6. [Responses API](https://mlflow.org/docs/latest/genai/serving/responses-agent/)
+
+## Next Steps
+
+- Discover available tools: see **discover-tools** skill
+- Grant resource permissions: see **add-tools** skill
+- Test locally: see **run-locally** skill
+- Deploy: see **deploy** skill
diff --git a/agent-supervisor-api/.claude/skills/quickstart/SKILL.md b/agent-supervisor-api/.claude/skills/quickstart/SKILL.md
new file mode 100644
index 00000000..e550162c
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/quickstart/SKILL.md
@@ -0,0 +1,83 @@
+---
+name: quickstart
+description: "Set up Databricks agent development environment. Use when: (1) First time setup, (2) Configuring Databricks authentication, (3) User says 'quickstart', 'set up', 'authenticate', or 'configure databricks', (4) No .env file exists."
+---
+
+# Quickstart & Authentication
+
+## Prerequisites
+
+- **uv** (Python package manager)
+- **nvm** with Node 20 (for frontend)
+- **Databricks CLI v0.283.0+**
+
+Check CLI version:
+```bash
+databricks -v  # Must be v0.283.0 or above
+brew upgrade databricks  # If version is too old
+```
+
+## Run Quickstart
+
+```bash
+uv run quickstart
+```
+
+**Options:**
+- `--profile NAME`: Use specified profile (non-interactive)
+- `--host URL`: Workspace URL for initial setup
+- `-h, --help`: Show help
+
+**Examples:**
+```bash
+# Interactive (prompts for profile selection)
+uv run quickstart
+
+# Non-interactive with existing profile
+uv run quickstart --profile DEFAULT
+
+# New workspace setup
+uv run quickstart --host https://your-workspace.cloud.databricks.com
+```
+
+## What Quickstart Configures
+
+Creates/updates `.env` with:
+- `DATABRICKS_CONFIG_PROFILE` - Selected CLI profile
+- `MLFLOW_TRACKING_URI` - Set to `databricks://<profile-name>` for local auth
+- `MLFLOW_EXPERIMENT_ID` - Auto-created experiment ID
+
+## Manual Authentication (Fallback)
+
+If quickstart fails:
+
+```bash
+# Create new profile
+databricks auth login --host https://your-workspace.cloud.databricks.com
+
+# Verify
+databricks auth profiles
+```
+
+Then manually create `.env` (copy from `.env.example`):
+```bash
+# Authentication (choose one method)
+DATABRICKS_CONFIG_PROFILE=DEFAULT
+# DATABRICKS_HOST=https://<your-workspace-here>.databricks.com
+# DATABRICKS_TOKEN=dapi....
+
+# MLflow configuration
+MLFLOW_EXPERIMENT_ID=<your-experiment-id>
+MLFLOW_TRACKING_URI="databricks://DEFAULT"
+MLFLOW_REGISTRY_URI="databricks-uc"
+
+# Frontend proxy settings
+CHAT_APP_PORT=3000
+CHAT_PROXY_TIMEOUT_SECONDS=300
+```
+
+## Next Steps
+
+After quickstart completes:
+1. Run `uv run discover-tools` to find available workspace resources (see **discover-tools** skill)
+2. Run `uv run start-app` to test locally (see **run-locally** skill)
diff --git a/agent-supervisor-api/.claude/skills/run-locally/SKILL.md b/agent-supervisor-api/.claude/skills/run-locally/SKILL.md
new file mode 100644
index 00000000..3eb83c82
--- /dev/null
+++ b/agent-supervisor-api/.claude/skills/run-locally/SKILL.md
@@ -0,0 +1,90 @@
+---
+name: run-locally
+description: "Run and test the agent locally. Use when: (1) User says 'run locally', 'start server', 'test agent', or 'localhost', (2) Need curl commands to test API, (3) Troubleshooting local development issues, (4) Configuring server options like port or hot-reload."
+---
+
+# Run Agent Locally
+
+## Start the Server
+
+```bash
+uv run start-app
+```
+
+This starts the agent at http://localhost:8000
+
+## Server Options
+
+```bash
+# Hot-reload on code changes (development)
+uv run start-server --reload
+
+# Custom port
+uv run start-server --port 8001
+
+# Multiple workers (production-like)
+uv run start-server --workers 4
+
+# Combine options
+uv run start-server --reload --port 8001
+```
+
+## Test the API
+
+**Streaming request:**
+```bash
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }'
+```
+
+**Non-streaming request:**
+```bash
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{ "input": [{ "role": "user", "content": "hi" }] }'
+```
+
+## Run Evaluation
+
+```bash
+uv run agent-evaluate
+```
+
+Uses MLflow scorers (RelevanceToQuery, Safety).
+
+## Run Unit Tests
+
+```bash
+pytest [path]
+```
+
+## Troubleshooting
+
+| Issue | Solution |
+|-------|----------|
+| **Port already in use** | Use `--port 8001` or kill existing process |
+| **Authentication errors** | Verify `.env` is correct; run **quickstart** skill |
+| **Module not found** | Run `uv sync` to install dependencies |
+| **MLflow experiment not found** | Ensure `MLFLOW_TRACKING_URI` in `.env` is `databricks://<profile-name>` |
+
+### MLflow Experiment Not Found
+
+If you see: "The provided MLFLOW_EXPERIMENT_ID environment variable value does not exist"
+
+**Verify the experiment exists:**
+```bash
+databricks -p <profile> experiments get-experiment <experiment_id>
+```
+
+**Fix:** Ensure `.env` has the correct tracking URI format:
+```bash
+MLFLOW_TRACKING_URI="databricks://DEFAULT"  # Include profile name
+```
+
+The quickstart script configures this automatically. If you manually edited `.env`, ensure the profile name is included.
+
+## Next Steps
+
+- Modify your agent: see **modify-agent** skill
+- Deploy to Databricks: see **deploy** skill
diff --git a/agent-supervisor-api/AGENTS.md b/agent-supervisor-api/AGENTS.md
new file mode 100644
index 00000000..78935cc5
--- /dev/null
+++ b/agent-supervisor-api/AGENTS.md
@@ -0,0 +1,115 @@
+# Agent Development Guide
+
+## MANDATORY First Actions
+
+**Ask the user interactively:**
+
+1. **App deployment target:**
+   > "Do you have an existing Databricks app you want to deploy to, or should we create a new one? If existing, what's the app name?"
+
+   *Note: New apps should use the `agent-*` prefix (e.g., `agent-data-analyst`) unless the user specifies otherwise.*
+
+**Then check authentication and profile configuration:**
+
+1. Read the `.env` file to find `DATABRICKS_CONFIG_PROFILE` (e.g., `dev`)
+2. Run `databricks auth profiles` to verify the profile is configured and valid
+
+**CRITICAL: All `databricks` CLI commands must include the profile from `.env`.** Either use `--profile` or set the env var:
+
+```bash
+databricks <command> --profile <profile>
+# or
+DATABRICKS_CONFIG_PROFILE=<profile> databricks <command>
+```
+
+If no profiles exist or `.env` is missing, guide the user through running `uv run quickstart` to set up authentication and configuration.
+
+## Understanding User Goals
+
+**Ask the user questions to understand what they're building:**
+
+1. **What is the agent's purpose?** (e.g., data analyst assistant, customer support, code helper)
+2. **What data or tools does it need access to?**
+   - Unity Catalog functions (SQL UDFs, Python UDFs)
+   - Genie Spaces for natural language data queries
+   - Agent endpoints for specialized sub-agents
+   - External MCP servers via UC connections
+
+Use `uv run discover-tools` to show available resources in their workspace, then help them select the right ones.
+
+## Hosted Tool Types
+
+The Supervisor API supports these tool types. Each is specified in the `TOOLS` list in `agent_server/agent.py`:
+
+| Type | Description | Required keys |
+|---|---|---|
+| `uc_function` | Calls a UC function (SQL or Python UDF) | `name`, `name_alias`, `description` |
+| `genie` | Queries a Genie space to answer data questions | `name`, `description`, `space_id` |
+| `agent_endpoint` | Delegates to an existing agent endpoint | `name`, `description`, `endpoint_name` |
+| `mcp` | Connects to an external MCP server via a UC connection | `name`, `description`, `connection_name` |
+
+**For each tool added**, also add the corresponding resource permission in `databricks.yml`. See the **add-tools** skill for examples.
+
+## Handling Deployment Errors
+
+**If `databricks bundle deploy` fails with "An app with the same name already exists":**
+
+Ask the user: "I see there's an existing app with the same name. Would you like me to bind it to this bundle so we can manage it, or delete it and create a new one?"
+
+- **If they want to bind**: See the **deploy** skill for binding steps
+- **If they want to delete**: Run `databricks apps delete <app-name>` then deploy again
+
+---
+
+## Available Skills
+
+**Before executing any task, read the relevant skill file in `.claude/skills/`** - they contain tested commands, patterns, and troubleshooting steps.
+
+| Task | Skill | Path |
+|------|-------|------|
+| Setup, auth, first-time | **quickstart** | `.claude/skills/quickstart/SKILL.md` |
+| Find tools/resources | **discover-tools** | `.claude/skills/discover-tools/SKILL.md` |
+| Deploy to Databricks | **deploy** | `.claude/skills/deploy/SKILL.md` |
+| Add tools & permissions | **add-tools** | `.claude/skills/add-tools/SKILL.md` |
+| Run/test locally | **run-locally** | `.claude/skills/run-locally/SKILL.md` |
+| Modify agent code | **modify-agent** | `.claude/skills/modify-agent/SKILL.md` |
+
+---
+
+## Quick Commands
+
+| Task | Command |
+|------|---------|
+| Setup | `uv run quickstart` |
+| Discover tools | `uv run discover-tools` |
+| Run locally | `uv run start-app` |
+| Deploy | `databricks bundle deploy && databricks bundle run agent_supervisor_api` |
+| View logs | `databricks apps logs <app-name> --follow` |
+
+---
+
+## Key Files
+
+| File | Purpose |
+|------|---------|
+| `agent_server/agent.py` | Model, tools list, invoke/stream handlers |
+| `agent_server/start_server.py` | FastAPI server + MLflow setup |
+| `databricks.yml` | Bundle config & resource permissions |
+| `scripts/quickstart.py` | One-command setup script |
+| `scripts/discover_tools.py` | Discovers available workspace resources |
+
+---
+
+## Agent Framework Capabilities
+
+> **⚠️ IMPORTANT:** When adding any tool to the agent, you MUST also grant permissions in `databricks.yml`. See the **add-tools** skill for required steps and examples.
+
+**Key difference from other templates**: This template offloads the agent loop to Databricks via the Supervisor API. You do not need to implement tool execution logic in Python — just declare hosted tools and Databricks handles the rest.
+
+**Common Patterns:**
+- **Structured data retrieval** - Use `genie` tool type to query SQL tables/databases
+- **Code interpreter** - Use `uc_function` with `system.ai.python_exec` for Python execution
+- **Sub-agent delegation** - Use `agent_endpoint` to call specialized agents
+- **External services** - Use `mcp` with a UC connection for external MCP servers
+
+Reference: https://docs.databricks.com/aws/en/generative-ai/agent-bricks/supervisor-api.html
diff --git a/agent-supervisor-api/CLAUDE.md b/agent-supervisor-api/CLAUDE.md
new file mode 100644
index 00000000..43c994c2
--- /dev/null
+++ b/agent-supervisor-api/CLAUDE.md
@@ -0,0 +1 @@
+@AGENTS.md
diff --git a/agent-supervisor-api/README.md b/agent-supervisor-api/README.md
new file mode 100644
index 00000000..058b8c3a
--- /dev/null
+++ b/agent-supervisor-api/README.md
@@ -0,0 +1,101 @@
+# Agent using Supervisor API
+
+This template defines a conversational agent app that uses the [Databricks Supervisor API](https://docs.databricks.com/aws/en/generative-ai/agent-bricks/supervisor-api.html) for server-side tool execution. The app comes with a built-in chat UI, but also exposes an API endpoint for invoking the agent so that you can serve your UI elsewhere.
+
+Instead of managing an agent loop in application code, this template passes the model, tools, and input to a single Databricks endpoint. Databricks handles tool selection and response synthesis, so your agent code stays minimal.
+
+The Supervisor API is multi-AI: swap the model name (e.g. from `databricks-claude-sonnet-4-5` to `databricks-gpt-5-2`) to change AI providers without modifying tool or agent logic.
+
+## Requirements
+
+- AI Gateway (Beta) enabled for your account. See [Manage previews](https://docs.databricks.com/aws/en/admin/workspace-settings/manage-previews.html).
+- `uv` (Python package manager), `nvm` (Node version manager), and the Databricks CLI installed locally.
+
+> **Note**: The Supervisor API is routed through AI Gateway at `/mlflow/v1/responses`, not through the standard model serving endpoint at `/serving-endpoints`. The template configures `DatabricksOpenAI` with `base_url=f"{host}/mlflow/v1"` to point to the correct endpoint.
+
+## Build with AI Assistance
+
+We recommend using AI coding assistants (Claude Code, Cursor, GitHub Copilot) to customize and deploy this template. Agent Skills in `.claude/skills/` provide step-by-step guidance for common tasks like setup, adding tools, and deployment. These skills are automatically detected by Claude, Cursor, and GitHub Copilot.
+
+## Quick start
+
+Run the `uv run quickstart` script to quickly set up your local environment and start the agent server. At any step, if there are issues, refer to the manual local development loop setup below.
+
+This script will:
+
+1. Verify uv, nvm, and Databricks CLI installations
+2. Configure Databricks authentication
+3. Configure agent tracing, by creating and linking an MLflow experiment to your app
+4. Start the agent server and chat app
+
+```bash
+uv run quickstart
+```
+
+After the setup is complete, you can start the agent server and the chat app locally with:
+
+```bash
+uv run start-app
+```
+
+This will start the agent server and the chat app at http://localhost:8000.
+
+**Next steps**: see [modifying your agent](#modifying-your-agent) to customize and iterate on the agent code.
+
+## Modifying your agent
+
+The key files for customizing this template:
+
+| File | Purpose |
+|---|---|
+| `agent_server/agent.py` | Agent logic: model, tools, handlers |
+| `databricks.yml` | Bundle config and resource permissions |
+
+### Changing the model
+
+Edit the `MODEL` variable in `agent_server/agent.py`:
+
+```python
+MODEL = "databricks-gpt-5-2"  # switch to any Databricks-hosted model
+```
+
+No other code changes are needed — the Supervisor API handles the rest.
+
+### Adding hosted tools
+
+Edit the `TOOLS` list in `agent_server/agent.py`. Supported tool types: `uc_function`, `genie`, `agent_endpoint`, `mcp`. For each tool you add, grant the corresponding permission in `databricks.yml`. See the `add-tools` skill for examples.
+
+## How the client is configured
+
+`DatabricksOpenAI` defaults to `{host}/serving-endpoints` as its base URL. The Supervisor API is a distinct endpoint served by AI Gateway at `{host}/mlflow/v1/responses`. The `_get_client()` helper in `agent_server/agent.py` overrides the base URL accordingly:
+
+```python
+def _get_client() -> DatabricksOpenAI:
+    wc = WorkspaceClient()
+    return DatabricksOpenAI(
+        workspace_client=wc,
+        base_url=f"{wc.config.host}/mlflow/v1",
+    )
+```
+
+Authentication is handled automatically by the `WorkspaceClient` using your configured Databricks CLI credentials.
+
+## Deploying to Databricks Apps
+
+```bash
+databricks bundle deploy && databricks bundle run agent_supervisor_api
+```
+
+After the first deployment, the app URL is printed. Subsequent deployments update the existing app in place.
+
+## Running tests
+
+```bash
+uv run pytest tests/ -v
+```
+
+Unit tests run without credentials. Integration tests against the live Supervisor API require setting `ENG_ML_INFERENCE_TOKEN` (or `DATABRICKS_TOKEN` pointed at a workspace with AI Gateway enabled):
+
+```bash
+ENG_ML_INFERENCE_TOKEN=dapi... uv run pytest tests/ -v
+```
diff --git a/agent-supervisor-api/agent_server/__init__.py b/agent-supervisor-api/agent_server/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/agent-supervisor-api/agent_server/agent.py b/agent-supervisor-api/agent_server/agent.py
new file mode 100644
index 00000000..4844c930
--- /dev/null
+++ b/agent-supervisor-api/agent_server/agent.py
@@ -0,0 +1,72 @@
+import logging
+from typing import AsyncGenerator
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+    ResponsesAgentStreamEvent,
+)
+
+from agent_server.utils import get_session_id
+
+mlflow.openai.autolog()
+logging.getLogger("mlflow.utils.autologging_utils").setLevel(logging.ERROR)
+
+# Model name controls which AI provider runs the agent loop.
+# Swap to any Databricks-hosted model without changing your tool or agent code.
+MODEL = "databricks-claude-sonnet-4-5"
+
+# Hosted tools — the Supervisor API runs the tool-selection and synthesis loop
+# server-side. Add or remove tool definitions here to change agent behavior.
+TOOLS = [
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "system.ai.python_exec",
+            "name_alias": "python_exec",
+            "description": "Execute Python code to perform calculations, data analysis, or string processing.",
+        },
+    }
+]
+
+
+def _get_client() -> DatabricksOpenAI:
+    # The Supervisor API is served at /mlflow/v1/responses on the AI Gateway,
+    # not at /serving-endpoints (the default for DatabricksOpenAI). Pass the
+    # base_url explicitly so responses.create() hits the right endpoint.
+    wc = WorkspaceClient()
+    return DatabricksOpenAI(
+        workspace_client=wc,
+        base_url=f"{wc.config.host}/mlflow/v1",
+    )
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    if session_id := get_session_id(request):
+        mlflow.update_current_trace(metadata={"mlflow.trace.session": session_id})
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=response.output)
+
+
+@stream()
+def stream_handler(
+    request: ResponsesAgentRequest,
+) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
+    if session_id := get_session_id(request):
+        mlflow.update_current_trace(metadata={"mlflow.trace.session": session_id})
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
diff --git a/agent-supervisor-api/agent_server/evaluate_agent.py b/agent-supervisor-api/agent_server/evaluate_agent.py
new file mode 100644
index 00000000..b816ef4b
--- /dev/null
+++ b/agent-supervisor-api/agent_server/evaluate_agent.py
@@ -0,0 +1,100 @@
+import asyncio
+import logging
+
+import mlflow
+from dotenv import load_dotenv
+from mlflow.genai.agent_server import get_invoke_function
+from mlflow.genai.scorers import (
+    Completeness,
+    ConversationalSafety,
+    ConversationCompleteness,
+    Fluency,
+    KnowledgeRetention,
+    RelevanceToQuery,
+    Safety,
+    ToolCallCorrectness,
+    UserFrustration,
+)
+from mlflow.genai.simulators import ConversationSimulator
+from mlflow.types.responses import ResponsesAgentRequest
+
+# Load environment variables from .env if it exists
+load_dotenv(dotenv_path=".env", override=True)
+logging.getLogger("mlflow.utils.autologging_utils").setLevel(logging.ERROR)
+
+# need to import agent for our @invoke-registered function to be found
+from agent_server import agent  # noqa: F401
+
+# Create your evaluation dataset
+# Refer to documentation for evaluations:
+# Scorers: https://docs.databricks.com/aws/en/mlflow3/genai/eval-monitor/concepts/scorers
+# Predefined LLM scorers: https://mlflow.org/docs/latest/genai/eval-monitor/scorers/llm-judge/predefined
+# Defining custom scorers: https://docs.databricks.com/aws/en/mlflow3/genai/eval-monitor/custom-scorers
+test_cases = [
+    {
+        "goal": "Learn about the main dishes of Vietnamese cuisine",
+        "persona": "An impatient foodie who doesn't know much about Vietnamese cuisine.",
+        "simulation_guidelines": [
+            "Initially explore the main influences of Vietnamese cuisine before the main dishes.",
+        ],
+    },
+    {
+        "goal": "Figure out which prime numbers between 1 and 50 are also Fibonacci numbers",
+        "persona": "You are a math novice who has heard of prime numbers but doesn't know what Fibonacci numbers are.",
+        "simulation_guidelines": [
+            "Initially ask questions to understand the Fibonacci sequence before exploring which ones are prime.",
+            "Prefer short messages",
+        ],
+    },
+]
+
+simulator = ConversationSimulator(
+    test_cases=test_cases,
+    max_turns=5,
+    user_model="databricks:/databricks-claude-sonnet-4-5",
+)
+
+# Get the invoke function that was registered via @invoke decorator in your agent
+invoke_fn = get_invoke_function()
+assert invoke_fn is not None, (
+    "No function registered with the `@invoke` decorator found."
+    "Ensure you have a function decorated with `@invoke()`."
+)
+
+# if invoke function is async, wrap it in a sync function.
+# The simulator may already be running an event loop, so we use nest_asyncio
+# to allow nested run_until_complete() calls without deadlocking.
+if asyncio.iscoroutinefunction(invoke_fn):
+    import nest_asyncio
+
+    nest_asyncio.apply()
+
+    def predict_fn(input: list[dict], **kwargs) -> dict:
+        req = ResponsesAgentRequest(input=input)
+        loop = asyncio.get_event_loop()
+        response = loop.run_until_complete(invoke_fn(req))
+        return response.model_dump()
+else:
+
+    def predict_fn(input: list[dict], **kwargs) -> dict:
+        req = ResponsesAgentRequest(input=input)
+        response = invoke_fn(req)
+        return response.model_dump()
+
+
+def evaluate():
+    mlflow.genai.evaluate(
+        data=simulator,
+        predict_fn=predict_fn,
+        scorers=[
+            Completeness(),
+            ConversationCompleteness(),
+            ConversationalSafety(),
+            KnowledgeRetention(),
+            UserFrustration(),
+            Fluency(),
+            RelevanceToQuery(),
+            Safety(),
+            ToolCallCorrectness(),
+        ],
+    )
diff --git a/agent-supervisor-api/agent_server/start_server.py b/agent-supervisor-api/agent_server/start_server.py
new file mode 100644
index 00000000..1d5ecd0c
--- /dev/null
+++ b/agent-supervisor-api/agent_server/start_server.py
@@ -0,0 +1,17 @@
+from dotenv import load_dotenv
+from mlflow.genai.agent_server import AgentServer, setup_mlflow_git_based_version_tracking
+
+# Load env vars from .env before importing the agent for proper auth
+load_dotenv(dotenv_path=".env", override=True)
+
+# Need to import the agent to register the functions with the server
+import agent_server.agent  # noqa: E402
+
+agent_server = AgentServer("ResponsesAgent", enable_chat_proxy=True)
+# Define the app as a module level variable to enable multiple workers
+app = agent_server.app  # noqa: F841
+setup_mlflow_git_based_version_tracking()
+
+
+def main():
+    agent_server.run(app_import_string="agent_server.start_server:app")
diff --git a/agent-supervisor-api/agent_server/utils.py b/agent-supervisor-api/agent_server/utils.py
new file mode 100644
index 00000000..4fb82302
--- /dev/null
+++ b/agent-supervisor-api/agent_server/utils.py
@@ -0,0 +1,9 @@
+from mlflow.types.responses import ResponsesAgentRequest
+
+
+def get_session_id(request: ResponsesAgentRequest) -> str | None:
+    if request.context and request.context.conversation_id:
+        return request.context.conversation_id
+    if request.custom_inputs and isinstance(request.custom_inputs, dict):
+        return request.custom_inputs.get("session_id")
+    return None
diff --git a/agent-supervisor-api/app.yaml b/agent-supervisor-api/app.yaml
new file mode 100644
index 00000000..34465373
--- /dev/null
+++ b/agent-supervisor-api/app.yaml
@@ -0,0 +1,16 @@
+command: ["uv", "run", "start-app"]
+# databricks apps listen by default on port 8000
+
+env:
+  - name: MLFLOW_TRACKING_URI
+    value: "databricks"
+  - name: MLFLOW_REGISTRY_URI
+    value: "databricks-uc"
+  - name: API_PROXY
+    value: "http://localhost:8000/invocations"
+  - name: CHAT_APP_PORT
+    value: "3000"
+  - name: CHAT_PROXY_TIMEOUT_SECONDS
+    value: "300"
+  - name: MLFLOW_EXPERIMENT_ID
+    valueFrom: "experiment"
diff --git a/agent-supervisor-api/databricks.yml b/agent-supervisor-api/databricks.yml
new file mode 100644
index 00000000..cb0f304e
--- /dev/null
+++ b/agent-supervisor-api/databricks.yml
@@ -0,0 +1,51 @@
+bundle:
+  name: agent_supervisor_api
+
+resources:
+  apps:
+    agent_supervisor_api:
+      name: "agent-supervisor-api"
+      description: "Supervisor API agent application"
+      source_code_path: ./
+      config:
+        command: ["uv", "run", "start-app"]
+        env:
+          - name: MLFLOW_TRACKING_URI
+            value: "databricks"
+          - name: MLFLOW_REGISTRY_URI
+            value: "databricks-uc"
+          - name: API_PROXY
+            value: "http://localhost:8000/invocations"
+          - name: CHAT_APP_PORT
+            value: "3000"
+          - name: CHAT_PROXY_TIMEOUT_SECONDS
+            value: "300"
+          - name: MLFLOW_EXPERIMENT_ID
+            value_from: "experiment"
+
+      # Resources which this app has access to
+      resources:
+        - name: 'experiment'
+          experiment:
+            experiment_id: ""
+            permission: 'CAN_MANAGE'
+        - name: 'python_exec'
+          unity_catalog_function:
+            function_full_name: "system.ai.python_exec"
+            permission: 'EXECUTE'
+
+targets:
+  dev:
+    mode: development
+    default: true
+    # workspace:
+    #   host: https://...
+
+  prod:
+    mode: production
+    # workspace:
+    #   host: https://...
+    resources:
+      apps:
+        agent_supervisor_api:
+          name: agent-supervisor-api
diff --git a/agent-supervisor-api/pyproject.toml b/agent-supervisor-api/pyproject.toml
new file mode 100644
index 00000000..037cf786
--- /dev/null
+++ b/agent-supervisor-api/pyproject.toml
@@ -0,0 +1,36 @@
+[project]
+name = "agent-server"
+version = "0.1.0"
+description = "MLflow-compatible agent server using the Databricks Supervisor API"
+readme = "README.md"
+authors = [
+    { name = "Agent Developer", email = "developer@example.com" }
+]
+requires-python = ">=3.11"
+dependencies = [
+    "fastapi>=0.129.0",
+    "uvicorn>=0.41.0",
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+    "databricks-agents>=1.9.3",
+    "mlflow>=3.10.0",
+    "python-dotenv>=1.2.1",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[dependency-groups]
+dev = [
+    "hatchling>=1.28.0",
+    "pytest>=9.0.2",
+]
+
+
+[project.scripts]
+quickstart = "scripts.quickstart:main"
+start-app = "scripts.start_app:main"
+start-server = "agent_server.start_server:main"
+agent-evaluate = "agent_server.evaluate_agent:evaluate"
+discover-tools = "scripts.discover_tools:main"
diff --git a/agent-supervisor-api/requirements.txt b/agent-supervisor-api/requirements.txt
new file mode 100644
index 00000000..60cc5e6a
--- /dev/null
+++ b/agent-supervisor-api/requirements.txt
@@ -0,0 +1 @@
+uv
diff --git a/agent-supervisor-api/scripts/__init__.py b/agent-supervisor-api/scripts/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/agent-supervisor-api/scripts/discover_tools.py b/agent-supervisor-api/scripts/discover_tools.py
new file mode 100755
index 00000000..3eb37963
--- /dev/null
+++ b/agent-supervisor-api/scripts/discover_tools.py
@@ -0,0 +1,432 @@
+#!/usr/bin/env python3
+"""
+Discover available tools and data sources for Databricks agents.
+
+This script scans for:
+- Unity Catalog functions (data retrieval tools e.g. SQL UDFs)
+- Unity Catalog tables (data sources)
+- Vector search indexes (RAG data sources)
+- Genie spaces (conversational interface over structured data)
+- Custom MCP servers (Databricks apps with name mcp-*)
+- External MCP servers (via Unity Catalog connections)
+"""
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+from typing import Any, Dict, List
+
+from databricks.sdk import WorkspaceClient
+
+DEFAULT_MAX_RESULTS = 100
+DEFAULT_MAX_SCHEMAS = 25
+
+def run_databricks_cli(args: List[str]) -> str:
+    """Run databricks CLI command and return output."""
+    try:
+        result = subprocess.run(
+            ["databricks"] + args,
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        print(f"Error running databricks CLI: {e.stderr}", file=sys.stderr)
+        return ""
+
+
+def discover_uc_functions(w: WorkspaceClient, catalog: str = None, max_schemas: int = DEFAULT_MAX_SCHEMAS) -> List[Dict[str, Any]]:
+    """Discover Unity Catalog functions that could be used as tools.
+
+    Args:
+        w: WorkspaceClient instance
+        catalog: Optional specific catalog to search
+        max_schemas: Total number of schemas to search across all catalogs
+    """
+    functions = []
+    schemas_searched = 0
+
+    try:
+        catalogs = [catalog] if catalog else [c.name for c in w.catalogs.list()]
+
+        for cat in catalogs:
+            if schemas_searched >= max_schemas:
+                break
+
+            try:
+                all_schemas = list(w.schemas.list(catalog_name=cat))
+                # Take schemas from this catalog until we hit the global budget
+                schemas_to_search = all_schemas[:max_schemas - schemas_searched]
+
+                for schema in schemas_to_search:
+                    schema_name = f"{cat}.{schema.name}"
+                    try:
+                        funcs = list(w.functions.list(catalog_name=cat, schema_name=schema.name))
+                        for func in funcs:
+                            functions.append({
+                                "type": "uc_function",
+                                "name": func.full_name,
+                                "catalog": cat,
+                                "schema": schema.name,
+                                "function_name": func.name,
+                                "comment": func.comment,
+                                "routine_definition": getattr(func, "routine_definition", None),
+                            })
+                    except Exception as e:
+                        # Skip schemas we can't access
+                        continue
+                    finally:
+                        schemas_searched += 1
+            except Exception as e:
+                # Skip catalogs we can't access
+                continue
+
+    except Exception as e:
+        print(f"Error discovering UC functions: {e}", file=sys.stderr)
+
+    return functions
+
+
+def discover_uc_tables(w: WorkspaceClient, catalog: str = None, schema: str = None, max_schemas: int = DEFAULT_MAX_SCHEMAS) -> List[Dict[str, Any]]:
+    """Discover Unity Catalog tables that could be queried.
+
+    Args:
+        w: WorkspaceClient instance
+        catalog: Optional specific catalog to search
+        schema: Optional specific schema to search (requires catalog)
+        max_schemas: Total number of schemas to search across all catalogs
+    """
+    tables = []
+    schemas_searched = 0
+
+    try:
+        catalogs = [catalog] if catalog else [c.name for c in w.catalogs.list()]
+
+        for cat in catalogs:
+            if cat in ["__databricks_internal", "system"]:
+                continue
+
+            if schemas_searched >= max_schemas:
+                break
+
+            try:
+                if schema:
+                    schemas_to_search = [schema]
+                else:
+                    all_schemas = [s.name for s in w.schemas.list(catalog_name=cat)]
+                    # Take schemas from this catalog until we hit the global budget
+                    schemas_to_search = all_schemas[:max_schemas - schemas_searched]
+
+                for sch in schemas_to_search:
+                    if sch == "information_schema":
+                        schemas_searched += 1
+                        continue
+
+                    try:
+                        tbls = list(w.tables.list(catalog_name=cat, schema_name=sch))
+                        for tbl in tbls:
+                            # Get column info
+                            columns = []
+                            if hasattr(tbl, "columns") and tbl.columns:
+                                columns = [
+                                    {"name": col.name, "type": col.type_name.value if hasattr(col.type_name, "value") else str(col.type_name)}
+                                    for col in tbl.columns
+                                ]
+
+                            tables.append({
+                                "type": "uc_table",
+                                "name": tbl.full_name,
+                                "catalog": cat,
+                                "schema": sch,
+                                "table_name": tbl.name,
+                                "table_type": tbl.table_type.value if tbl.table_type else None,
+                                "comment": tbl.comment,
+                                "columns": columns,
+                            })
+                    except Exception as e:
+                        # Skip schemas we can't access
+                        pass
+                    finally:
+                        schemas_searched += 1
+            except Exception as e:
+                # Skip catalogs we can't access
+                continue
+
+    except Exception as e:
+        print(f"Error discovering UC tables: {e}", file=sys.stderr)
+
+    return tables
+
+
+def discover_vector_search_indexes(w: WorkspaceClient) -> List[Dict[str, Any]]:
+    """Discover Vector Search indexes for RAG applications."""
+    indexes = []
+
+    try:
+        # List all vector search endpoints
+        endpoints = list(w.vector_search_endpoints.list_endpoints())
+
+        for endpoint in endpoints:
+            try:
+                # List indexes for each endpoint
+                endpoint_indexes = list(w.vector_search_indexes.list_indexes(endpoint_name=endpoint.name))
+                for idx in endpoint_indexes:
+                    indexes.append({
+                        "type": "vector_search_index",
+                        "name": idx.name,
+                        "endpoint": endpoint.name,
+                        "primary_key": idx.primary_key,
+                        "index_type": idx.index_type.value if idx.index_type else None,
+                        "status": idx.status.state.value if idx.status and idx.status.state else None,
+                    })
+            except Exception as e:
+                # Skip endpoints we can't access
+                continue
+
+    except Exception as e:
+        print(f"Error discovering vector search indexes: {e}", file=sys.stderr)
+
+    return indexes
+
+
+def discover_genie_spaces(w: WorkspaceClient) -> List[Dict[str, Any]]:
+    """Discover Genie spaces for conversational data access."""
+    spaces = []
+
+    try:
+        # Use SDK to list genie spaces
+        response = w.genie.list_spaces()
+        genie_spaces = response.spaces if hasattr(response, "spaces") else []
+        for space in genie_spaces:
+            spaces.append({
+                "type": "genie_space",
+                "id": space.space_id,
+                "name": space.title,
+                "description": space.description,
+            })
+    except Exception as e:
+        print(f"Error discovering Genie spaces: {e}", file=sys.stderr)
+
+    return spaces
+
+
+
+def discover_custom_mcp_servers(w: WorkspaceClient) -> List[Dict[str, Any]]:
+    """Discover custom MCP servers deployed as Databricks apps."""
+    custom_servers = []
+
+    try:
+        # List all apps and filter for those starting with mcp-
+        apps = w.apps.list()
+        for app in apps:
+            if app.name and app.name.startswith("mcp-"):
+                custom_servers.append({
+                    "type": "custom_mcp_server",
+                    "name": app.name,
+                    "url": app.url,
+                    "status": app.app_status.state.value if app.app_status and app.app_status.state else None,
+                    "description": app.description,
+                })
+    except Exception as e:
+        print(f"Error discovering custom MCP servers: {e}", file=sys.stderr)
+
+    return custom_servers
+
+
+def discover_external_mcp_servers(w: WorkspaceClient) -> List[Dict[str, Any]]:
+    """Discover external MCP servers configured via Unity Catalog connections."""
+    external_servers = []
+
+    try:
+        # List all connections and filter for MCP connections
+        connections = w.connections.list()
+        for conn in connections:
+            # Check if this is an MCP connection
+            if conn.options and conn.options.get("is_mcp_connection") == "true":
+                external_servers.append({
+                    "type": "external_mcp_server",
+                    "name": conn.name,
+                    "connection_type": conn.connection_type.value if hasattr(conn.connection_type, "value") else str(conn.connection_type),
+                    "comment": conn.comment,
+                    "full_name": conn.full_name,
+                })
+    except Exception as e:
+        print(f"Error discovering external MCP servers: {e}", file=sys.stderr)
+
+    return external_servers
+
+
+def format_output_markdown(results: Dict[str, List[Dict[str, Any]]]) -> str:
+    """Format discovery results as markdown."""
+    lines = ["# Agent Tools and Data Sources Discovery\n"]
+
+    # UC Functions
+    functions = results.get("uc_functions", [])
+    if functions:
+        lines.append(f"## Unity Catalog Functions ({len(functions)})\n")
+        lines.append("**What they are:** SQL UDFs that can be used as agent tools.\n")
+        lines.append("**How to use:** Access via UC functions MCP server:")
+        lines.append("- All functions in a schema: `{workspace_host}/api/2.0/mcp/functions/{catalog}/{schema}`")
+        lines.append("- Single function: `{workspace_host}/api/2.0/mcp/functions/{catalog}/{schema}/{function_name}`\n")
+        for func in functions[:10]:  # Show first 10
+            lines.append(f"- `{func['name']}`")
+            if func.get("comment"):
+                lines.append(f"  - {func['comment']}")
+        if len(functions) > 10:
+            lines.append(f"\n*...and {len(functions) - 10} more*\n")
+        lines.append("")
+
+    # UC Tables
+    tables = results.get("uc_tables", [])
+    if tables:
+        lines.append(f"## Unity Catalog Tables ({len(tables)})\n")
+        lines.append("Structured data that agents can query via UC SQL functions.\n")
+        for table in tables[:10]:  # Show first 10
+            lines.append(f"- `{table['name']}` ({table['table_type']})")
+            if table.get("comment"):
+                lines.append(f"  - {table['comment']}")
+            if table.get("columns"):
+                col_names = [c["name"] for c in table["columns"][:5]]
+                lines.append(f"  - Columns: {', '.join(col_names)}")
+        if len(tables) > 10:
+            lines.append(f"\n*...and {len(tables) - 10} more*\n")
+        lines.append("")
+
+    # Vector Search Indexes
+    indexes = results.get("vector_search_indexes", [])
+    if indexes:
+        lines.append(f"## Vector Search Indexes ({len(indexes)})\n")
+        lines.append("These can be used for RAG applications with unstructured data.\n")
+        lines.append("**How to use:** Connect via MCP server at `{workspace_host}/api/2.0/mcp/vector-search/{catalog}/{schema}` or\n")
+        lines.append("`{workspace_host}/api/2.0/mcp/vector-search/{catalog}/{schema}/{index_name}`\n")
+        for idx in indexes:
+            lines.append(f"- `{idx['name']}`")
+            lines.append(f"  - Endpoint: {idx['endpoint']}")
+            lines.append(f"  - Status: {idx['status']}")
+        lines.append("")
+
+    # Genie Spaces
+    spaces = results.get("genie_spaces", [])
+    if spaces:
+        lines.append(f"## Genie Spaces ({len(spaces)})\n")
+        lines.append("**What they are:** Natural language interface to your data\n")
+        lines.append("**How to use:** Connect via Genie MCP server at `{workspace_host}/api/2.0/mcp/genie/{space_id}`\n")
+        for space in spaces:
+            lines.append(f"- `{space['name']}` (ID: {space['id']})")
+            if space.get("description"):
+                lines.append(f"  - {space['description']}")
+        lines.append("")
+
+    # Custom MCP Servers (Databricks Apps)
+    custom_servers = results.get("custom_mcp_servers", [])
+    if custom_servers:
+        lines.append(f"## Custom MCP Servers ({len(custom_servers)})\n")
+        lines.append("**What:** Your own MCP servers deployed as Databricks Apps (names starting with mcp-)\n")
+        lines.append("**How to use:** Access via `{app_url}/mcp`\n")
+        lines.append("**⚠️ Important:** Custom MCP server apps require manual permission grants:")
+        lines.append("1. Get your agent app's service principal: `databricks apps get <agent-app> --output json | jq -r '.service_principal_name'`")
+        lines.append("2. Grant permission: `databricks apps update-permissions <mcp-server-app> --service-principal <sp-name> --permission-level CAN_USE`")
+        lines.append("(Apps are not yet supported as resource dependencies in databricks.yml)\n")
+        for server in custom_servers:
+            lines.append(f"- `{server['name']}`")
+            if server.get("url"):
+                lines.append(f"  - URL: {server['url']}")
+            if server.get("status"):
+                lines.append(f"  - Status: {server['status']}")
+            if server.get("description"):
+                lines.append(f"  - {server['description']}")
+        lines.append("")
+
+    # External MCP Servers (UC Connections)
+    external_servers = results.get("external_mcp_servers", [])
+    if external_servers:
+        lines.append(f"## External MCP Servers ({len(external_servers)})\n")
+        lines.append("**What:** Third-party MCP servers via Unity Catalog connections\n")
+        lines.append("**How to use:** Connect via `{workspace_host}/api/2.0/mcp/external/{connection_name}`\n")
+        lines.append("**Benefits:** Secure access to external APIs through UC governance\n")
+        for server in external_servers:
+            lines.append(f"- `{server['name']}`")
+            if server.get("full_name"):
+                lines.append(f"  - Full name: {server['full_name']}")
+            if server.get("comment"):
+                lines.append(f"  - {server['comment']}")
+        lines.append("")
+    return "\n".join(lines)
+
+
+def main():
+    """Main discovery function."""
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Discover available agent tools and data sources")
+    parser.add_argument("--catalog", help="Limit discovery to specific catalog")
+    parser.add_argument("--schema", help="Limit discovery to specific schema (requires --catalog)")
+    parser.add_argument("--format", choices=["json", "markdown"], default="markdown", help="Output format")
+    parser.add_argument("--output", help="Output file (default: stdout)")
+    parser.add_argument("--profile", help="Databricks CLI profile to use (default: uses default profile)")
+    parser.add_argument("--max-results", type=int, default=DEFAULT_MAX_RESULTS, help=f"Maximum results per resource type (default: {DEFAULT_MAX_RESULTS})")
+    parser.add_argument("--max-schemas", type=int, default=DEFAULT_MAX_SCHEMAS, help=f"Total schemas to search across all catalogs (default: {DEFAULT_MAX_SCHEMAS})")
+
+    args = parser.parse_args()
+
+    if args.schema and not args.catalog:
+        print("Error: --schema requires --catalog", file=sys.stderr)
+        sys.exit(1)
+
+    print("Discovering available tools and data sources...", file=sys.stderr)
+
+    # Initialize Databricks workspace client
+    # Only pass profile if specified, otherwise use default
+    if args.profile:
+        w = WorkspaceClient(profile=args.profile)
+    else:
+        w = WorkspaceClient()
+
+    results = {}
+
+    # Discover each type with configurable limits
+    print("- UC Functions...", file=sys.stderr)
+    results["uc_functions"] = discover_uc_functions(w, catalog=args.catalog, max_schemas=args.max_schemas)[:args.max_results]
+
+    print("- UC Tables...", file=sys.stderr)
+    results["uc_tables"] = discover_uc_tables(w, catalog=args.catalog, schema=args.schema, max_schemas=args.max_schemas)[:args.max_results]
+
+    print("- Vector Search Indexes...", file=sys.stderr)
+    results["vector_search_indexes"] = discover_vector_search_indexes(w)[:args.max_results]
+
+    print("- Genie Spaces...", file=sys.stderr)
+    results["genie_spaces"] = discover_genie_spaces(w)[:args.max_results]
+
+    print("- Custom MCP Servers (Apps)...", file=sys.stderr)
+    results["custom_mcp_servers"] = discover_custom_mcp_servers(w)[:args.max_results]
+
+    print("- External MCP Servers (Connections)...", file=sys.stderr)
+    results["external_mcp_servers"] = discover_external_mcp_servers(w)[:args.max_results]
+
+    # Format output
+    if args.format == "json":
+        output = json.dumps(results, indent=2)
+    else:
+        output = format_output_markdown(results)
+
+    # Write output
+    if args.output:
+        Path(args.output).write_text(output)
+        print(f"\nResults written to {args.output}", file=sys.stderr)
+    else:
+        print("\n" + output)
+
+    # Print summary
+    print("\n=== Discovery Summary ===", file=sys.stderr)
+    print(f"UC Functions: {len(results['uc_functions'])}", file=sys.stderr)
+    print(f"UC Tables: {len(results['uc_tables'])}", file=sys.stderr)
+    print(f"Vector Search Indexes: {len(results['vector_search_indexes'])}", file=sys.stderr)
+    print(f"Genie Spaces: {len(results['genie_spaces'])}", file=sys.stderr)
+    print(f"Custom MCP Servers: {len(results['custom_mcp_servers'])}", file=sys.stderr)
+    print(f"External MCP Servers: {len(results['external_mcp_servers'])}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/agent-supervisor-api/scripts/quickstart.py b/agent-supervisor-api/scripts/quickstart.py
new file mode 100644
index 00000000..59982e03
--- /dev/null
+++ b/agent-supervisor-api/scripts/quickstart.py
@@ -0,0 +1,768 @@
+#!/usr/bin/env python3
+"""
+Quickstart setup script for Databricks agent development.
+
+This script handles:
+- Checking prerequisites (uv, nvm, Node 20, Databricks CLI)
+- Databricks authentication (OAuth)
+- MLflow experiment creation
+- Environment variable configuration (.env)
+- Lakebase instance setup (for memory-enabled templates)
+
+Usage:
+    uv run quickstart [OPTIONS]
+
+Options:
+    --profile NAME    Use specified Databricks profile (non-interactive)
+    --host URL        Databricks workspace URL (for initial setup)
+    --lakebase NAME   Lakebase instance name (for memory features)
+    -h, --help        Show this help message
+"""
+
+import argparse
+import json
+import os
+import platform
+import re
+import secrets
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+
+def print_header(text: str) -> None:
+    """Print a section header."""
+    print(f"\n{'=' * 67}")
+    print(text)
+    print("=" * 67)
+
+
+def print_step(text: str) -> None:
+    """Print a step indicator."""
+    print(f"\n{text}")
+
+
+def print_success(text: str) -> None:
+    """Print a success message."""
+    print(f"✓ {text}")
+
+
+def print_error(text: str) -> None:
+    """Print an error message."""
+    print(f"✗ {text}", file=sys.stderr)
+
+
+def print_troubleshooting_auth() -> None:
+    print("\nTroubleshooting tips:")
+    print("  • Ensure you have network connectivity to your Databricks workspace")
+    print("  • Try running 'databricks auth login' manually to see detailed errors")
+    print("  • Check that your workspace URL is correct")
+    print("  • If using a browser for OAuth, ensure popups are not blocked")
+
+
+def print_troubleshooting_api() -> None:
+    print("\nTroubleshooting tips:")
+    print("  • Your authentication token may have expired - try 'databricks auth login' to refresh")
+    print("  • Verify your profile is valid with 'databricks auth profiles'")
+    print("  • Check network connectivity to your Databricks workspace")
+
+
+def command_exists(cmd: str) -> bool:
+    """Check if a command exists in PATH."""
+    return shutil.which(cmd) is not None
+
+
+def run_command(
+    cmd: list[str],
+    capture_output: bool = True,
+    check: bool = True,
+    env: dict = None,
+    show_output: bool = False,
+) -> subprocess.CompletedProcess:
+    """Run a command and return the result."""
+    merged_env = {**os.environ, **(env or {})}
+    if show_output:
+        return subprocess.run(cmd, check=check, env=merged_env)
+    return subprocess.run(
+        cmd, capture_output=capture_output, text=True, check=check, env=merged_env
+    )
+
+
+def get_command_output(cmd: list[str], env: dict = None) -> str:
+    """Run a command and return its stdout."""
+    result = run_command(cmd, env=env)
+    return result.stdout.strip()
+
+
+def check_prerequisites() -> dict[str, bool]:
+    """Check which prerequisites are installed."""
+    print_step("Checking prerequisites...")
+
+    prereqs = {
+        "uv": command_exists("uv"),
+        "node": command_exists("node"),
+        "npm": command_exists("npm"),
+        "databricks": command_exists("databricks"),
+    }
+
+    for name, installed in prereqs.items():
+        if installed:
+            try:
+                if name == "uv":
+                    version = get_command_output(["uv", "--version"])
+                elif name == "node":
+                    version = get_command_output(["node", "--version"])
+                elif name == "npm":
+                    version = get_command_output(["npm", "--version"])
+                elif name == "databricks":
+                    version = get_command_output(["databricks", "--version"])
+                print_success(f"{name} is installed: {version}")
+            except Exception:
+                print_success(f"{name} is installed")
+        else:
+            print(f"  {name} is not installed")
+
+    return prereqs
+
+
+def check_missing_prerequisites(prereqs: dict[str, bool]) -> list[str]:
+    """Return list of missing prerequisites with install instructions."""
+    missing = []
+
+    if not prereqs["uv"]:
+        missing.append("uv - Install with: curl -LsSf https://astral.sh/uv/install.sh | sh")
+
+    if not prereqs["node"] or not prereqs["npm"]:
+        missing.append("Node.js 20 - Install with: nvm install 20 (or download from nodejs.org)")
+
+    if not prereqs["databricks"]:
+        if platform.system() == "Darwin":
+            missing.append("Databricks CLI - Install with: brew install databricks/tap/databricks")
+        else:
+            missing.append(
+                "Databricks CLI - Install with: curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh"
+            )
+
+    if missing:
+        missing.append(
+            "Note: These install commands are for Unix/macOS. For Windows, please visit the official documentation for each tool."
+        )
+
+    return missing
+
+
+def check_node_version() -> str | None:
+    """Check if the installed Node.js version meets Vite's requirements.
+
+    Vite requires Node.js >=20.19, >=22.12, or >=23.
+    Node 21.x is an odd-numbered release and not supported.
+
+    Returns None if the version is OK, or an error string if not.
+    """
+    if not command_exists("node"):
+        return None  # Missing node is handled by check_missing_prerequisites
+
+    try:
+        version_str = get_command_output(["node", "--version"])
+    except Exception:
+        return None
+
+    match = re.match(r"v(\d+)\.(\d+)\.(\d+)", version_str)
+    if not match:
+        return None
+
+    major, minor = int(match.group(1)), int(match.group(2))
+
+    # Node 21.x is odd-numbered and not a Vite target
+    if major == 21:
+        return (
+            f"Node.js {version_str} is not supported by Vite (odd-numbered release).\n"
+            "  Please install Node.js 20.19+, 22.12+, or 23+.\n"
+            "  Run: nvm install 22"
+        )
+
+    # Check supported version ranges
+    if major == 20 and minor >= 19:
+        return None
+    if major == 22 and minor >= 12:
+        return None
+    if major >= 23:
+        return None
+
+    # Version is too old or unsupported
+    if major == 20:
+        return (
+            f"Node.js {version_str} is too old for Vite (requires 20.19+).\n"
+            f"  Your version: {version_str}\n"
+            "  Run: nvm install 20  (to get latest 20.x)"
+        )
+    if major == 22:
+        return (
+            f"Node.js {version_str} is too old for Vite (requires 22.12+).\n"
+            f"  Your version: {version_str}\n"
+            "  Run: nvm install 22  (to get latest 22.x)"
+        )
+
+    if major < 20:
+        return (
+            f"Node.js {version_str} is too old for Vite (requires 20.19+).\n"
+            f"  Your version: {version_str}\n"
+            "  Run: nvm install 22"
+        )
+
+    return (
+        f"Node.js {version_str} is not supported by Vite.\n"
+        "  Vite requires Node.js 20.19+, 22.12+, or 23+.\n"
+        "  Run: nvm install 22"
+    )
+
+
+def setup_env_file() -> None:
+    """Copy .env.example to .env if it doesn't exist."""
+    print_step("Setting up configuration files...")
+
+    env_local = Path(".env")
+    env_example = Path(".env.example")
+
+    if env_local.exists():
+        print("  .env already exists, skipping copy...")
+    elif env_example.exists():
+        shutil.copy(env_example, env_local)
+        print_success("Copied .env.example to .env")
+    else:
+        # Create a minimal .env
+        env_local.write_text(
+            "# Databricks configuration\n"
+            "DATABRICKS_CONFIG_PROFILE=DEFAULT\n"
+            "MLFLOW_EXPERIMENT_ID=\n"
+            'MLFLOW_TRACKING_URI="databricks"\n'
+            'MLFLOW_REGISTRY_URI="databricks-uc"\n'
+        )
+        print_success("Created .env")
+
+
+def update_env_file(key: str, value: str) -> None:
+    """Update or add a key-value pair in .env."""
+    env_file = Path(".env")
+
+    if not env_file.exists():
+        env_file.write_text(f"{key}={value}\n")
+        return
+
+    content = env_file.read_text()
+
+    # Check if key exists (with or without quotes, with any value)
+    pattern = rf"^{re.escape(key)}=.*$"
+    if re.search(pattern, content, re.MULTILINE):
+        # Replace existing key
+        content = re.sub(pattern, f"{key}={value}", content, flags=re.MULTILINE)
+    else:
+        # Add new key
+        if not content.endswith("\n"):
+            content += "\n"
+        content += f"{key}={value}\n"
+
+    env_file.write_text(content)
+
+
+def get_databricks_profiles() -> list[dict]:
+    """Get list of existing Databricks profiles."""
+    try:
+        result = run_command(["databricks", "auth", "profiles"], check=False)
+        if result.returncode != 0 or not result.stdout.strip():
+            return []
+
+        lines = result.stdout.strip().split("\n")
+        if len(lines) <= 1:  # Only header or empty
+            return []
+
+        # Parse the output - first line is header
+        profiles = []
+        for line in lines[1:]:
+            if line.strip():
+                # Profile name is the first column
+                parts = line.split()
+                if parts:
+                    profiles.append(
+                        {
+                            "name": parts[0],
+                            "line": line,
+                        }
+                    )
+
+        return profiles
+    except Exception:
+        return []
+
+
+def validate_profile(profile_name: str) -> bool:
+    """Test if a Databricks profile is authenticated."""
+    try:
+        env = {"DATABRICKS_CONFIG_PROFILE": profile_name}
+        result = run_command(
+            ["databricks", "current-user", "me"],
+            check=False,
+            env=env,
+        )
+        return result.returncode == 0
+    except Exception:
+        return False
+
+
+def authenticate_profile(profile_name: str, host: str = None) -> bool:
+    """Authenticate a Databricks profile."""
+    print(f"\nAuthenticating profile '{profile_name}'...")
+    print("You will be prompted to log in to Databricks in your browser.\n")
+
+    cmd = ["databricks", "auth", "login", "--profile", profile_name]
+    if host:
+        cmd.extend(["--host", host])
+
+    try:
+        # Run interactively so user can see browser prompt
+        result = subprocess.run(cmd)
+        return result.returncode == 0
+    except Exception as e:
+        print_error(f"Authentication failed: {e}")
+        return False
+
+
+def select_profile_interactive(profiles: list[dict]) -> str:
+    """Let user select a profile interactively."""
+    print("\nFound existing Databricks profiles:\n")
+
+    # Print header and profiles
+    for i, profile in enumerate(profiles, 1):
+        print(f"  {i}) {profile['line']}")
+
+    print()
+
+    while True:
+        choice = input("Enter the number of the profile you want to use: ").strip()
+        if not choice:
+            print_error("Profile selection is required")
+            continue
+
+        try:
+            index = int(choice) - 1
+            if 0 <= index < len(profiles):
+                return profiles[index]["name"]
+            else:
+                print_error(f"Please choose a number between 1 and {len(profiles)}")
+        except ValueError:
+            print_error("Please enter a valid number")
+
+
+def setup_databricks_auth(profile_arg: str = None, host_arg: str = None) -> str:
+    """Set up Databricks authentication and return the profile name."""
+    print_step("Setting up Databricks authentication...")
+
+    # If profile was specified via CLI, use it directly
+    if profile_arg:
+        profile_name = profile_arg
+        print(f"Using specified profile: {profile_name}")
+    else:
+        # Check for existing profiles
+        profiles = get_databricks_profiles()
+
+        if profiles:
+            profile_name = select_profile_interactive(profiles)
+            print(f"\nSelected profile: {profile_name}")
+        else:
+            # No profiles exist - need to create one
+            profile_name = None
+
+    # Validate or authenticate the profile
+    if profile_name:
+        if validate_profile(profile_name):
+            print_success(f"Successfully validated profile '{profile_name}'")
+        else:
+            print(f"Profile '{profile_name}' is not authenticated.")
+            if not authenticate_profile(profile_name):
+                print_error(f"Failed to authenticate profile '{profile_name}'")
+                print_troubleshooting_auth()
+                sys.exit(1)
+            print_success(f"Successfully authenticated profile '{profile_name}'")
+    else:
+        # Create new profile
+        print("No existing profiles found. Setting up Databricks authentication...")
+
+        if host_arg:
+            host = host_arg
+            print(f"Using specified host: {host}")
+        else:
+            host = input(
+                "\nPlease enter your Databricks host URL\n(e.g., https://your-workspace.cloud.databricks.com): "
+            ).strip()
+
+            if not host:
+                print_error("Databricks host is required")
+                sys.exit(1)
+
+        profile_name = "DEFAULT"
+        if not authenticate_profile(profile_name, host):
+            print_error("Databricks authentication failed")
+            print_troubleshooting_auth()
+            sys.exit(1)
+        print_success(f"Successfully authenticated with Databricks")
+
+    # Update .env with profile
+    update_env_file("DATABRICKS_CONFIG_PROFILE", profile_name)
+    update_env_file("MLFLOW_TRACKING_URI", f'"databricks://{profile_name}"')
+    print_success(f"Databricks profile '{profile_name}' saved to .env")
+
+    return profile_name
+
+
+def get_databricks_host(profile_name: str) -> str:
+    """Get the Databricks workspace host URL from the profile."""
+    try:
+        result = run_command(
+            ["databricks", "auth", "env", "--profile", profile_name, "--output", "json"],
+            check=False,
+        )
+        if result.returncode == 0:
+            env_data = json.loads(result.stdout)
+            env_vars = env_data.get("env", {})
+            host = env_vars.get("DATABRICKS_HOST", "")
+            return host.rstrip("/")
+    except Exception:
+        pass
+    return ""
+
+
+def get_databricks_username(profile_name: str) -> str:
+    """Get the current Databricks username."""
+    try:
+        result = run_command(
+            ["databricks", "-p", profile_name, "current-user", "me", "--output", "json"]
+        )
+        user_data = json.loads(result.stdout)
+        return user_data.get("userName", "")
+    except Exception as e:
+        print_error(f"Failed to get Databricks username: {e}")
+        print_troubleshooting_api()
+        sys.exit(1)
+
+
+def create_mlflow_experiment(profile_name: str, username: str) -> tuple[str, str]:
+    """Create an MLflow experiment and return (name, id)."""
+    print_step("Creating MLflow experiment...")
+
+    experiment_name = f"/Users/{username}/agents-on-apps"
+
+    try:
+        # Try to create with default name
+        result = run_command(
+            [
+                "databricks",
+                "-p",
+                profile_name,
+                "experiments",
+                "create-experiment",
+                experiment_name,
+                "--output",
+                "json",
+            ],
+            check=False,
+        )
+
+        if result.returncode == 0:
+            experiment_id = json.loads(result.stdout).get("experiment_id", "")
+            print_success(f"Created experiment '{experiment_name}' with ID: {experiment_id}")
+            return experiment_name, experiment_id
+
+        # Name already exists, try with random suffix
+        print("Experiment name already exists, creating with random suffix...")
+        random_suffix = secrets.token_hex(4)
+        experiment_name = f"/Users/{username}/agents-on-apps-{random_suffix}"
+
+        result = run_command(
+            [
+                "databricks",
+                "-p",
+                profile_name,
+                "experiments",
+                "create-experiment",
+                experiment_name,
+                "--output",
+                "json",
+            ]
+        )
+        experiment_id = json.loads(result.stdout).get("experiment_id", "")
+        print_success(f"Created experiment '{experiment_name}' with ID: {experiment_id}")
+        return experiment_name, experiment_id
+
+    except Exception as e:
+        print_error(f"Failed to create MLflow experiment: {e}")
+        print_troubleshooting_api()
+        sys.exit(1)
+
+
+def check_lakebase_required() -> bool:
+    """Check if databricks.yml has LAKEBASE_INSTANCE_NAME configured."""
+    databricks_yml = Path("databricks.yml")
+    if not databricks_yml.exists():
+        return False
+
+    content = databricks_yml.read_text()
+    return "LAKEBASE_INSTANCE_NAME" in content
+
+
+def get_env_value(key: str) -> str:
+    """Get a value from .env file."""
+    env_file = Path(".env")
+    if not env_file.exists():
+        return ""
+
+    content = env_file.read_text()
+    pattern = rf"^{re.escape(key)}=(.*)$"
+    match = re.search(pattern, content, re.MULTILINE)
+    if match:
+        return match.group(1).strip().strip('"').strip("'")
+    return ""
+
+
+def validate_lakebase_instance(profile_name: str, lakebase_name: str) -> dict | None:
+    """Validate that the Lakebase instance exists and user has access.
+
+    Returns the instance info dict on success, None on failure.
+    """
+    print(f"Validating Lakebase instance '{lakebase_name}'...")
+
+    result = run_command(
+        [
+            "databricks",
+            "-p",
+            profile_name,
+            "database",
+            "get-database-instance",
+            lakebase_name,
+            "--output",
+            "json",
+        ],
+        check=False,
+    )
+
+    if result.returncode == 0:
+        print_success(f"Lakebase instance '{lakebase_name}' validated")
+        return json.loads(result.stdout)
+
+    # Check if database command is not recognized (old CLI version)
+    if 'unknown command "database" for "databricks"' in (result.stderr or ""):
+        print_error(
+            "The 'databricks database' command requires a newer version of the Databricks CLI."
+        )
+        print("  Please upgrade: https://docs.databricks.com/dev-tools/cli/install.html")
+        return None
+
+    error_msg = result.stderr.lower() if result.stderr else ""
+    if "not found" in error_msg:
+        print_error(
+            f"Lakebase instance '{lakebase_name}' not found. Please check the instance name."
+        )
+    elif "permission" in error_msg or "forbidden" in error_msg or "unauthorized" in error_msg:
+        print_error(f"No permission to access Lakebase instance '{lakebase_name}'")
+    else:
+        print_error(
+            f"Failed to validate Lakebase instance: {result.stderr.strip() if result.stderr else 'Unknown error'}"
+        )
+    return None
+
+
+def setup_lakebase(profile_name: str, username: str, lakebase_arg: str = None) -> str:
+    """Set up Lakebase instance for memory features."""
+    print_step("Setting up Lakebase instance for memory...")
+
+    lakebase_name = None
+
+    # If --lakebase was provided, use it directly
+    if lakebase_arg:
+        lakebase_name = lakebase_arg
+        print(f"Using provided Lakebase instance: {lakebase_name}")
+    else:
+        # Check if already set in .env
+        existing = get_env_value("LAKEBASE_INSTANCE_NAME")
+        if existing:
+            print(f"Found existing Lakebase instance in .env: {existing}")
+            new_value = input(
+                "Press Enter to keep this value, or enter a new instance name: "
+            ).strip()
+            lakebase_name = new_value if new_value else existing
+        else:
+            # Interactive mode - prompt for instance name
+            lakebase_name = input("Please enter your Lakebase instance name: ").strip()
+
+            if not lakebase_name:
+                print_error("Lakebase instance name is required for memory features")
+                sys.exit(1)
+
+    # Validate that the Lakebase instance exists and user has access
+    instance_info = validate_lakebase_instance(profile_name, lakebase_name)
+    if not instance_info:
+        sys.exit(1)
+
+    # Update .env with the Lakebase instance name
+    update_env_file("LAKEBASE_INSTANCE_NAME", lakebase_name)
+    print_success(f"Lakebase instance name '{lakebase_name}' saved to .env")
+
+    # Set up PostgreSQL connection environment variables
+    pg_host = instance_info.get("read_write_dns", "")
+    if pg_host:
+        update_env_file("PGHOST", pg_host)
+        print_success(f"PGHOST set to '{pg_host}'")
+    else:
+        print_error("Could not get read_write_dns from Lakebase instance")
+
+    update_env_file("PGUSER", username)
+    print_success(f"PGUSER set to '{username}'")
+
+    update_env_file("PGDATABASE", "databricks_postgres")
+    print_success("PGDATABASE set to 'databricks_postgres'")
+
+    return lakebase_name
+
+
+def update_databricks_yml_experiment(experiment_id: str) -> None:
+    """Update databricks.yml to set the experiment ID in the app resource."""
+    yml_path = Path("databricks.yml")
+    if not yml_path.exists():
+        return
+
+    content = yml_path.read_text()
+
+    # Set the experiment_id in the app's experiment resource
+    content = re.sub(
+        r'(experiment_id: )"[^"]*"',
+        f'\\1"{experiment_id}"',
+        content,
+    )
+
+    yml_path.write_text(content)
+    print_success("Updated databricks.yml with experiment ID")
+
+
+def update_databricks_yml_lakebase(lakebase_name: str) -> None:
+    """Update databricks.yml to replace lakebase placeholder with actual instance name."""
+    yml_path = Path("databricks.yml")
+    if not yml_path.exists():
+        return
+
+    content = yml_path.read_text()
+    if "<your-lakebase-instance-name>" not in content:
+        return
+
+    content = content.replace("<your-lakebase-instance-name>", lakebase_name)
+    yml_path.write_text(content)
+    print_success("Updated databricks.yml with Lakebase instance name")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Quickstart setup for Databricks agent development",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    uv run quickstart                    # Interactive setup
+    uv run quickstart --profile DEFAULT  # Use existing profile (non-interactive)
+    uv run quickstart --host https://...  # Set up new profile with host
+    uv run quickstart --lakebase my-db   # Include Lakebase setup for memory
+        """,
+    )
+    parser.add_argument(
+        "--profile",
+        help="Use specified Databricks profile (non-interactive)",
+        metavar="NAME",
+    )
+    parser.add_argument(
+        "--host",
+        help="Databricks workspace URL (for initial setup)",
+        metavar="URL",
+    )
+    parser.add_argument(
+        "--lakebase",
+        help="Lakebase instance name (for memory features)",
+        metavar="NAME",
+    )
+
+    args = parser.parse_args()
+
+    try:
+        print_header("Agent on Apps - Quickstart Setup")
+
+        # Step 1: Check prerequisites
+        prereqs = check_prerequisites()
+        missing = check_missing_prerequisites(prereqs)
+
+        if missing:
+            print_step("Missing prerequisites:")
+            for item in missing:
+                print(f"  • {item}")
+            print("\nPlease install the missing prerequisites and run this script again.")
+            sys.exit(1)
+
+        # Check Node.js version meets Vite requirements
+        node_error = check_node_version()
+        if node_error:
+            print_error(f"Node.js version check failed:\n  {node_error}")
+            sys.exit(1)
+
+        # Step 2: Set up .env
+        setup_env_file()
+
+        # Step 3: Databricks authentication
+        profile_name = setup_databricks_auth(args.profile, args.host)
+
+        # Step 4: Get username and create MLflow experiment
+        print_step("Getting Databricks username...")
+        username = get_databricks_username(profile_name)
+        print(f"Username: {username}")
+
+        experiment_name, experiment_id = create_mlflow_experiment(profile_name, username)
+
+        # Step 5: Update .env with experiment ID
+        update_env_file("MLFLOW_EXPERIMENT_ID", experiment_id)
+        print_success("Updated .env with experiment ID")
+
+        # Step 5b: Update databricks.yml to use literal experiment ID
+        update_databricks_yml_experiment(experiment_id)
+
+        # Step 6: Lakebase setup (if needed for memory features)
+        lakebase_name = None
+        lakebase_required = args.lakebase or check_lakebase_required()
+        if lakebase_required:
+            lakebase_name = setup_lakebase(profile_name, username, args.lakebase)
+            update_databricks_yml_lakebase(lakebase_name)
+
+        # Final summary
+        host = get_databricks_host(profile_name)
+
+        print_header("Setup Complete!")
+        summary = f"""
+✓ Prerequisites verified (uv, Node.js, Databricks CLI)
+✓ Databricks authenticated with profile: {profile_name}
+✓ Configuration files created (.env)
+
+✓ MLflow experiment created for tracing and evaluation: {experiment_name}
+✓ Experiment ID: {experiment_id}"""
+
+        if host and experiment_id:
+            summary += f"\n  {host}/ml/experiments/{experiment_id}"
+
+        if lakebase_name:
+            summary += f"\n\n✓ Lakebase instance: {lakebase_name}"
+            summary += "\n✓ PostgreSQL variables set (PGHOST, PGUSER, PGDATABASE)"
+            if host:
+                summary += f"\n  {host}/lakebase/provisioned/{lakebase_name}"
+
+        summary += "\nNext step: Run 'uv run start-app' to start the agent locally\n"
+        print(summary)
+
+    except KeyboardInterrupt:
+        print("\n\nSetup cancelled.")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/agent-supervisor-api/scripts/start_app.py b/agent-supervisor-api/scripts/start_app.py
new file mode 100644
index 00000000..58625601
--- /dev/null
+++ b/agent-supervisor-api/scripts/start_app.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python3
+"""
+Start script for running frontend and backend processes concurrently.
+
+Requirements:
+1. Not reporting ready until BOTH frontend and backend processes are ready
+2. Exiting as soon as EITHER process fails
+3. Printing error logs if either process fails
+
+Usage:
+    start-app [OPTIONS]
+
+All options are passed through to the backend server (start-server).
+See 'uv run start-server --help' for available options.
+"""
+
+import argparse
+import os
+import re
+import shutil
+import socket
+import subprocess
+import sys
+import threading
+import time
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+# Readiness patterns
+BACKEND_READY = [r"Uvicorn running on", r"Application startup complete", r"Started server process"]
+FRONTEND_READY = [r"Server is running on http://localhost"]
+
+
+def check_port_available(port: int) -> bool:
+    """Check if a port is available by attempting to bind to it."""
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(("localhost", port))
+        return True
+    except OSError:
+        return False
+
+
+class ProcessManager:
+    def __init__(self, port=8000, no_ui=False):
+        self.backend_process = None
+        self.frontend_process = None
+        self.backend_ready = False
+        self.frontend_ready = False
+        self.failed = threading.Event()
+        self.backend_log = None
+        self.frontend_log = None
+        self.port = port
+        self.no_ui = no_ui
+
+    def check_ports(self):
+        """Check that required ports are available before starting processes."""
+        backend_port = self.port
+
+        errors = []
+        if not check_port_available(backend_port):
+            errors.append(
+                f"Port {backend_port} (backend) is already in use.\n"
+                f"  To free it: lsof -ti :{backend_port} | xargs kill -9"
+            )
+
+        if not self.no_ui:
+            frontend_port = int(os.environ.get("CHAT_APP_PORT", os.environ.get("PORT", "3000")))
+
+            if backend_port == frontend_port:
+                print(
+                    f"ERROR: Backend and frontend are both configured to use port {backend_port}."
+                )
+                print("  Set CHAT_APP_PORT in .env to a different port (e.g., CHAT_APP_PORT=3000).")
+                sys.exit(1)
+
+            if not check_port_available(frontend_port):
+                port_source = (
+                    "CHAT_APP_PORT"
+                    if os.environ.get("CHAT_APP_PORT")
+                    else "PORT"
+                    if os.environ.get("PORT")
+                    else "default"
+                )
+                errors.append(
+                    f"Port {frontend_port} (frontend, source: {port_source}) is already in use.\n"
+                    f"  To free it: lsof -ti :{frontend_port} | xargs kill -9\n"
+                    f"  Or set a different port: CHAT_APP_PORT=<port> in .env"
+                )
+
+        if errors:
+            print("ERROR: Port(s) already in use:\n")
+            for error in errors:
+                print(f"  {error}\n")
+            sys.exit(1)
+
+    def monitor_process(self, process, name, log_file, patterns):
+        is_ready = False
+        try:
+            for line in iter(process.stdout.readline, ""):
+                if not line:
+                    break
+
+                line = line.rstrip()
+                log_file.write(line + "\n")
+                print(f"[{name}] {line}")
+
+                # Check readiness
+                if not is_ready and any(re.search(p, line, re.IGNORECASE) for p in patterns):
+                    is_ready = True
+                    if name == "backend":
+                        self.backend_ready = True
+                    else:
+                        self.frontend_ready = True
+                    print(f"✓ {name.capitalize()} is ready!")
+
+                    if self.no_ui and self.backend_ready:
+                        print("\n" + "=" * 50)
+                        print("✓ Backend is ready! (running without UI)")
+                        print(f"✓ API available at http://localhost:{self.port}")
+                        print("=" * 50 + "\n")
+                    elif self.backend_ready and self.frontend_ready:
+                        print("\n" + "=" * 50)
+                        print("✓ Both frontend and backend are ready!")
+                        print(f"✓ Open the frontend at http://localhost:{self.port}")
+                        print("=" * 50 + "\n")
+
+            process.wait()
+            if process.returncode != 0:
+                self.failed.set()
+
+        except Exception as e:
+            print(f"Error monitoring {name}: {e}")
+            self.failed.set()
+
+    def clone_frontend_if_needed(self):
+        if Path("e2e-chatbot-app-next").exists():
+            return True
+
+        print("Cloning e2e-chatbot-app-next...")
+        for url in [
+            "https://github.com/databricks/app-templates.git",
+            "git@github.com:databricks/app-templates.git",
+        ]:
+            try:
+                subprocess.run(
+                    ["git", "clone", "--filter=blob:none", "--sparse", url, "temp-app-templates"],
+                    check=True,
+                    capture_output=True,
+                )
+                break
+            except subprocess.CalledProcessError:
+                continue
+        else:
+            print("ERROR: Failed to clone repository.")
+            print(
+                "Manually download from: https://download-directory.github.io/?url=https://github.com/databricks/app-templates/tree/main/e2e-chatbot-app-next"
+            )
+            return False
+
+        subprocess.run(
+            ["git", "sparse-checkout", "set", "e2e-chatbot-app-next"],
+            cwd="temp-app-templates",
+            check=True,
+        )
+        Path("temp-app-templates/e2e-chatbot-app-next").rename("e2e-chatbot-app-next")
+        shutil.rmtree("temp-app-templates", ignore_errors=True)
+        return True
+
+    def start_process(self, cmd, name, log_file, patterns, cwd=None):
+        print(f"Starting {name}...")
+        process = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, cwd=cwd
+        )
+
+        thread = threading.Thread(
+            target=self.monitor_process, args=(process, name, log_file, patterns), daemon=True
+        )
+        thread.start()
+        return process
+
+    def print_logs(self, log_path):
+        print(f"\nLast 50 lines of {log_path}:")
+        print("-" * 40)
+        try:
+            lines = Path(log_path).read_text().splitlines()
+            print("\n".join(lines[-50:]))
+        except FileNotFoundError:
+            print(f"(no {log_path} found)")
+        print("-" * 40)
+
+    def cleanup(self):
+        print("\n" + "=" * 42)
+        print("Shutting down..." if self.no_ui else "Shutting down both processes...")
+        print("=" * 42)
+
+        for proc in [self.backend_process, self.frontend_process]:
+            if proc:
+                try:
+                    proc.terminate()
+                    proc.wait(timeout=5)
+                except (subprocess.TimeoutExpired, Exception):
+                    proc.kill()
+
+        if self.backend_log:
+            self.backend_log.close()
+        if self.frontend_log:
+            self.frontend_log.close()
+
+    def run(self, backend_args=None):
+        load_dotenv(dotenv_path=".env", override=True)
+        if not os.environ.get("DATABRICKS_APP_NAME"):
+            self.check_ports()
+
+        if not self.no_ui:
+            if not self.clone_frontend_if_needed():
+                print("WARNING: Failed to clone frontend. Continuing with backend only.")
+                self.no_ui = True
+            else:
+                # Set API_PROXY environment variable for frontend to connect to backend
+                os.environ["API_PROXY"] = f"http://localhost:{self.port}/invocations"
+
+        # Open log files
+        self.backend_log = open("backend.log", "w", buffering=1)
+        if not self.no_ui:
+            self.frontend_log = open("frontend.log", "w", buffering=1)
+
+        try:
+            # Build backend command, passing through all arguments
+            backend_cmd = ["uv", "run", "start-server"]
+            if backend_args:
+                backend_cmd.extend(backend_args)
+
+            # Start backend
+            self.backend_process = self.start_process(
+                backend_cmd, "backend", self.backend_log, BACKEND_READY
+            )
+
+            if not self.no_ui:
+                # Setup and start frontend
+                frontend_dir = Path("e2e-chatbot-app-next")
+                for cmd, desc in [("npm install", "install"), ("npm run build", "build")]:
+                    print(f"Running npm {desc}...")
+                    result = subprocess.run(
+                        cmd.split(), cwd=frontend_dir, capture_output=True, text=True
+                    )
+                    if result.returncode != 0:
+                        print(f"npm {desc} failed: {result.stderr}")
+                        return 1
+
+                self.frontend_process = self.start_process(
+                    ["npm", "run", "start"],
+                    "frontend",
+                    self.frontend_log,
+                    FRONTEND_READY,
+                    cwd=frontend_dir,
+                )
+
+                print(
+                    f"\nMonitoring processes (Backend PID: {self.backend_process.pid}, Frontend PID: {self.frontend_process.pid})\n"
+                )
+            else:
+                print(f"\nMonitoring backend process (PID: {self.backend_process.pid})\n")
+
+            # Wait for failure
+            while not self.failed.is_set():
+                time.sleep(0.1)
+                if self.backend_process.poll() is not None:
+                    self.failed.set()
+                    break
+                if (
+                    not self.no_ui
+                    and self.frontend_process
+                    and self.frontend_process.poll() is not None
+                ):
+                    self.failed.set()
+                    break
+
+            # Determine which failed
+            if self.no_ui or self.backend_process.poll() is not None:
+                failed_name = "backend"
+                failed_proc = self.backend_process
+            else:
+                failed_name = "frontend"
+                failed_proc = self.frontend_process
+            exit_code = failed_proc.returncode if failed_proc else 1
+
+            print(
+                f"\n{'=' * 42}\nERROR: {failed_name} process exited with code {exit_code}\n{'=' * 42}"
+            )
+            self.print_logs("backend.log")
+            if not self.no_ui:
+                self.print_logs("frontend.log")
+            return exit_code
+
+        except KeyboardInterrupt:
+            print("\nInterrupted")
+            return 0
+
+        finally:
+            self.cleanup()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Start agent frontend and backend",
+        usage="%(prog)s [OPTIONS]\n\nAll options are passed through to start-server. "
+        "Use 'uv run start-server --help' for available options.",
+    )
+    parser.add_argument(
+        "--no-ui",
+        action="store_true",
+        help="Run backend only, skip frontend UI",
+    )
+    args, backend_args = parser.parse_known_args()
+
+    # Extract port from backend_args if specified
+    port = 8000
+    for i, arg in enumerate(backend_args):
+        if arg == "--port" and i + 1 < len(backend_args):
+            try:
+                port = int(backend_args[i + 1])
+            except ValueError:
+                pass
+            break
+
+    sys.exit(ProcessManager(port=port, no_ui=args.no_ui).run(backend_args))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/agent-supervisor-api/tests/__init__.py b/agent-supervisor-api/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/agent-supervisor-api/tests/test_agent.py b/agent-supervisor-api/tests/test_agent.py
new file mode 100644
index 00000000..b5b9577b
--- /dev/null
+++ b/agent-supervisor-api/tests/test_agent.py
@@ -0,0 +1,214 @@
+"""Tests for the agent-supervisor-api template.
+
+Unit tests run without credentials. Integration tests require setting
+DATABRICKS_HOST and DATABRICKS_TOKEN (or a configured CLI profile).
+
+Integration test target:
+  Host: https://eng-ml-inference.staging.cloud.databricks.com
+  AI Gateway: https://1653573648247579.ai-gateway.staging.cloud.databricks.com/mlflow/v1/responses
+  LiteSwap header: x-databricks-traffic-id: testenv://liteswap/mas-arv
+  Model: databricks-claude-opus-4-6
+"""
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+from mlflow.types.responses import ResponsesAgentRequest, ResponsesAgentResponse
+
+
+# --- Unit tests (no credentials needed) ---
+
+
+def test_module_imports():
+    """Agent module imports cleanly without auth."""
+    import agent_server.agent as agent
+    assert hasattr(agent, "MODEL")
+    assert hasattr(agent, "TOOLS")
+    assert hasattr(agent, "invoke_handler")
+    assert hasattr(agent, "stream_handler")
+    assert hasattr(agent, "_get_client")
+
+
+def test_get_client_uses_mlflow_v1_base_url():
+    """_get_client configures DatabricksOpenAI with /mlflow/v1 base URL, not /serving-endpoints."""
+    from agent_server.agent import _get_client
+
+    mock_wc = MagicMock()
+    mock_wc.config.host = "https://test.cloud.databricks.com"
+
+    captured = {}
+
+    def fake_databricks_openai(**kwargs):
+        captured.update(kwargs)
+        return MagicMock()
+
+    with patch("agent_server.agent.WorkspaceClient", return_value=mock_wc), \
+         patch("agent_server.agent.DatabricksOpenAI", side_effect=fake_databricks_openai):
+        _get_client()
+
+    assert captured["base_url"] == "https://test.cloud.databricks.com/mlflow/v1"
+    assert captured["workspace_client"] is mock_wc
+
+
+def test_tools_structure():
+    """TOOLS list has correct structure."""
+    from agent_server.agent import TOOLS
+    assert len(TOOLS) >= 1
+    tool = TOOLS[0]
+    assert "type" in tool
+    assert tool["type"] in ("uc_function", "genie", "agent_endpoint", "mcp")
+
+
+def test_model_is_string():
+    from agent_server.agent import MODEL
+    assert isinstance(MODEL, str)
+    assert len(MODEL) > 0
+
+
+def test_get_session_id_from_conversation_id():
+    from agent_server.utils import get_session_id
+    req = MagicMock(spec=ResponsesAgentRequest)
+    req.context = MagicMock()
+    req.context.conversation_id = "conv-123"
+    assert get_session_id(req) == "conv-123"
+
+
+def test_get_session_id_from_custom_inputs():
+    from agent_server.utils import get_session_id
+    req = MagicMock(spec=ResponsesAgentRequest)
+    req.context = None
+    req.custom_inputs = {"session_id": "sess-456"}
+    assert get_session_id(req) == "sess-456"
+
+
+def test_get_session_id_returns_none():
+    from agent_server.utils import get_session_id
+    req = MagicMock(spec=ResponsesAgentRequest)
+    req.context = None
+    req.custom_inputs = None
+    assert get_session_id(req) is None
+
+
+def test_invoke_handler_calls_responses_create():
+    """invoke_handler calls client.responses.create with correct params."""
+    from agent_server.agent import MODEL, TOOLS
+
+    mock_output = [{"type": "message", "id": "msg_001", "role": "assistant", "content": [{"type": "output_text", "text": "hi"}]}]
+    mock_response = MagicMock()
+    mock_response.output = mock_output
+
+    mock_client = MagicMock()
+    mock_client.responses.create.return_value = mock_response
+
+    with patch("agent_server.agent._get_client", return_value=mock_client):
+        req = MagicMock(spec=ResponsesAgentRequest)
+        req.context = None
+        req.custom_inputs = None
+        req.input = [MagicMock()]
+        req.input[0].model_dump.return_value = {"type": "message", "role": "user", "content": "hi"}
+
+        from agent_server.agent import invoke_handler
+        result = invoke_handler(req)
+
+    mock_client.responses.create.assert_called_once_with(
+        model=MODEL,
+        input=[{"type": "message", "role": "user", "content": "hi"}],
+        tools=TOOLS,
+        stream=False,
+    )
+    assert isinstance(result, ResponsesAgentResponse)
+    assert len(result.output) == 1
+    assert result.output[0].id == "msg_001"
+
+
+def test_stream_handler_calls_responses_create_streaming():
+    """stream_handler calls client.responses.create with stream=True."""
+    from agent_server.agent import MODEL, TOOLS
+
+    mock_client = MagicMock()
+    mock_client.responses.create.return_value = iter([])
+
+    with patch("agent_server.agent._get_client", return_value=mock_client):
+        req = MagicMock(spec=ResponsesAgentRequest)
+        req.context = None
+        req.custom_inputs = None
+        req.input = [MagicMock()]
+        req.input[0].model_dump.return_value = {"type": "message", "role": "user", "content": "hi"}
+
+        from agent_server.agent import stream_handler
+        stream_handler(req)  # Returns the iterator from client
+
+    mock_client.responses.create.assert_called_once_with(
+        model=MODEL,
+        input=[{"type": "message", "role": "user", "content": "hi"}],
+        tools=TOOLS,
+        stream=True,
+    )
+
+
+# --- Integration tests (require credentials for eng-ml-inference staging) ---
+
+
+INTEGRATION_REASON = (
+    "Integration test requires DATABRICKS_TOKEN and DATABRICKS_HOST for "
+    "eng-ml-inference staging (workspace 1653573648247579). "
+    "Set ENG_ML_INFERENCE_TOKEN to enable."
+)
+
+
+@pytest.mark.skipif(
+    not os.environ.get("ENG_ML_INFERENCE_TOKEN"),
+    reason=INTEGRATION_REASON,
+)
+def test_supervisor_api_basic_call():
+    """End-to-end: call the Supervisor API with a simple prompt (no tools)."""
+    import openai
+
+    token = os.environ["ENG_ML_INFERENCE_TOKEN"]
+    client = openai.OpenAI(
+        base_url="https://1653573648247579.ai-gateway.staging.cloud.databricks.com/mlflow/v1",
+        api_key=token,
+        default_headers={"x-databricks-traffic-id": "testenv://liteswap/mas-arv"},
+    )
+    response = client.responses.create(
+        model="databricks-claude-opus-4-6",
+        input=[{"type": "message", "role": "user", "content": "Reply with just the word 'OK'."}],
+    )
+    assert response.output_text.strip() != ""
+
+
+@pytest.mark.skipif(
+    not os.environ.get("ENG_ML_INFERENCE_TOKEN"),
+    reason=INTEGRATION_REASON,
+)
+def test_supervisor_api_with_genie_tool():
+    """End-to-end: call Supervisor API with Genie tool (the NYC taxi example)."""
+    import openai
+
+    token = os.environ["ENG_ML_INFERENCE_TOKEN"]
+    client = openai.OpenAI(
+        base_url="https://1653573648247579.ai-gateway.staging.cloud.databricks.com/mlflow/v1",
+        api_key=token,
+        default_headers={"x-databricks-traffic-id": "testenv://liteswap/mas-arv"},
+    )
+    response = client.responses.create(
+        model="databricks-claude-opus-4-6",
+        input=[
+            {
+                "type": "message",
+                "role": "user",
+                "content": "What zipcodes do the taxis operate in?",
+            }
+        ],
+        tools=[
+            {
+                "type": "genie",
+                "genie": {
+                    "name": "nyc-taxi-space",
+                    "description": "Information about NYC Taxi spaces",
+                    "space_id": "01f07892cf3118edad0a4054bcd25122",
+                },
+            }
+        ],
+    )
+    assert response.output_text.strip() != ""

From a9b247c80bdcccb91c3f5c6666571a12f301f649 Mon Sep 17 00:00:00 2001
From: Sid Murching <sid.murching@databricks.com>
Date: Tue, 10 Mar 2026 22:19:09 -0700
Subject: [PATCH 2/6] fix: correct AI Gateway URL derivation and output
 serialization

- Derive AI Gateway base URL from workspace ID + domain (not workspace host)
  e.g. https://<workspace_id>.ai-gateway.<domain>/mlflow/v1
- Add liteswap staging header via extra_headers per-call
- Fix output serialization: call .model_dump() on each response.output item
- Swap example tool to Genie (NYC taxi space) to demonstrate hosted tools
- Update MODEL to databricks-claude-opus-4-6
- Add unit tests for _ai_gateway_base_url, _get_client, invoke/stream handlers

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 agent-supervisor-api/README.md             | 19 ++++++----
 agent-supervisor-api/agent_server/agent.py | 40 +++++++++++++++------
 agent-supervisor-api/tests/test_agent.py   | 42 ++++++++++++++++++----
 3 files changed, 77 insertions(+), 24 deletions(-)

diff --git a/agent-supervisor-api/README.md b/agent-supervisor-api/README.md
index 058b8c3a..2dfc1322 100644
--- a/agent-supervisor-api/README.md
+++ b/agent-supervisor-api/README.md
@@ -67,15 +67,20 @@ Edit the `TOOLS` list in `agent_server/agent.py`. Supported tool types: `uc_func
 
 ## How the client is configured
 
-`DatabricksOpenAI` defaults to `{host}/serving-endpoints` as its base URL. The Supervisor API is a distinct endpoint served by AI Gateway at `{host}/mlflow/v1/responses`. The `_get_client()` helper in `agent_server/agent.py` overrides the base URL accordingly:
+`DatabricksOpenAI` defaults to `{host}/serving-endpoints` as its base URL. The Supervisor API is served by AI Gateway at a **dedicated subdomain**, not the workspace host:
+
+```
+https://<workspace_id>.ai-gateway.<domain>/mlflow/v1/responses
+```
+
+The `_ai_gateway_base_url()` helper in `agent_server/agent.py` derives this URL automatically from the workspace host and ID:
 
 ```python
-def _get_client() -> DatabricksOpenAI:
-    wc = WorkspaceClient()
-    return DatabricksOpenAI(
-        workspace_client=wc,
-        base_url=f"{wc.config.host}/mlflow/v1",
-    )
+def _ai_gateway_base_url(wc: WorkspaceClient) -> str:
+    host = wc.config.host          # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    return f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
 ```
 
 Authentication is handled automatically by the `WorkspaceClient` using your configured Databricks CLI credentials.
diff --git a/agent-supervisor-api/agent_server/agent.py b/agent-supervisor-api/agent_server/agent.py
index 4844c930..eb5eacfa 100644
--- a/agent-supervisor-api/agent_server/agent.py
+++ b/agent-supervisor-api/agent_server/agent.py
@@ -1,4 +1,5 @@
 import logging
+import re
 from typing import AsyncGenerator
 
 import mlflow
@@ -18,30 +19,45 @@
 
 # Model name controls which AI provider runs the agent loop.
 # Swap to any Databricks-hosted model without changing your tool or agent code.
-MODEL = "databricks-claude-sonnet-4-5"
+MODEL = "databricks-claude-opus-4-6"
 
 # Hosted tools — the Supervisor API runs the tool-selection and synthesis loop
 # server-side. Add or remove tool definitions here to change agent behavior.
 TOOLS = [
     {
-        "type": "uc_function",
-        "uc_function": {
-            "name": "system.ai.python_exec",
-            "name_alias": "python_exec",
-            "description": "Execute Python code to perform calculations, data analysis, or string processing.",
+        "type": "genie",
+        "genie": {
+            "name": "nyc-taxi-space",
+            "description": "Information about NYC Taxi spaces",
+            "space_id": "01f07892cf3118edad0a4054bcd25122",
         },
     }
 ]
 
 
+def _ai_gateway_base_url(wc: WorkspaceClient) -> str:
+    """Derive the AI Gateway base URL from the workspace host and workspace ID.
+
+    The Supervisor API is served by AI Gateway at a dedicated subdomain:
+      https://<workspace_id>.ai-gateway.<domain>/mlflow/v1
+
+    This is distinct from the workspace host (used for /serving-endpoints).
+    """
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    # Strip the first subdomain and replace with <workspace_id>.ai-gateway
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    return f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+
+
+_EXTRA_HEADERS = {"x-databricks-traffic-id": "testenv://liteswap/mas-arv"}
+
+
 def _get_client() -> DatabricksOpenAI:
-    # The Supervisor API is served at /mlflow/v1/responses on the AI Gateway,
-    # not at /serving-endpoints (the default for DatabricksOpenAI). Pass the
-    # base_url explicitly so responses.create() hits the right endpoint.
     wc = WorkspaceClient()
     return DatabricksOpenAI(
         workspace_client=wc,
-        base_url=f"{wc.config.host}/mlflow/v1",
+        base_url=_ai_gateway_base_url(wc),
     )
 
 
@@ -54,8 +70,9 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
         stream=False,
+        extra_headers=_EXTRA_HEADERS,
     )
-    return ResponsesAgentResponse(output=response.output)
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
 
 
 @stream()
@@ -69,4 +86,5 @@ def stream_handler(
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
         stream=True,
+        extra_headers=_EXTRA_HEADERS,
     )
diff --git a/agent-supervisor-api/tests/test_agent.py b/agent-supervisor-api/tests/test_agent.py
index b5b9577b..eeafa30e 100644
--- a/agent-supervisor-api/tests/test_agent.py
+++ b/agent-supervisor-api/tests/test_agent.py
@@ -29,12 +29,37 @@ def test_module_imports():
     assert hasattr(agent, "_get_client")
 
 
-def test_get_client_uses_mlflow_v1_base_url():
-    """_get_client configures DatabricksOpenAI with /mlflow/v1 base URL, not /serving-endpoints."""
+def test_ai_gateway_base_url():
+    """_ai_gateway_base_url derives the AI Gateway URL from workspace host + workspace ID."""
+    from agent_server.agent import _ai_gateway_base_url
+
+    mock_wc = MagicMock()
+    mock_wc.config.host = "https://my-workspace.cloud.databricks.com"
+    mock_wc.get_workspace_id.return_value = 1234567890
+
+    url = _ai_gateway_base_url(mock_wc)
+    assert url == "https://1234567890.ai-gateway.cloud.databricks.com/mlflow/v1"
+
+
+def test_ai_gateway_base_url_staging():
+    """Works for staging workspaces too."""
+    from agent_server.agent import _ai_gateway_base_url
+
+    mock_wc = MagicMock()
+    mock_wc.config.host = "https://eng-ml-inference.staging.cloud.databricks.com"
+    mock_wc.get_workspace_id.return_value = 1653573648247579
+
+    url = _ai_gateway_base_url(mock_wc)
+    assert url == "https://1653573648247579.ai-gateway.staging.cloud.databricks.com/mlflow/v1"
+
+
+def test_get_client_uses_ai_gateway_url():
+    """_get_client configures DatabricksOpenAI with the AI Gateway base URL."""
     from agent_server.agent import _get_client
 
     mock_wc = MagicMock()
-    mock_wc.config.host = "https://test.cloud.databricks.com"
+    mock_wc.config.host = "https://my-workspace.cloud.databricks.com"
+    mock_wc.get_workspace_id.return_value = 1234567890
 
     captured = {}
 
@@ -46,7 +71,7 @@ def fake_databricks_openai(**kwargs):
          patch("agent_server.agent.DatabricksOpenAI", side_effect=fake_databricks_openai):
         _get_client()
 
-    assert captured["base_url"] == "https://test.cloud.databricks.com/mlflow/v1"
+    assert captured["base_url"] == "https://1234567890.ai-gateway.cloud.databricks.com/mlflow/v1"
     assert captured["workspace_client"] is mock_wc
 
 
@@ -93,9 +118,10 @@ def test_invoke_handler_calls_responses_create():
     """invoke_handler calls client.responses.create with correct params."""
     from agent_server.agent import MODEL, TOOLS
 
-    mock_output = [{"type": "message", "id": "msg_001", "role": "assistant", "content": [{"type": "output_text", "text": "hi"}]}]
+    mock_item = MagicMock()
+    mock_item.model_dump.return_value = {"type": "message", "id": "msg_001", "role": "assistant", "content": [{"type": "output_text", "text": "hi"}]}
     mock_response = MagicMock()
-    mock_response.output = mock_output
+    mock_response.output = [mock_item]
 
     mock_client = MagicMock()
     mock_client.responses.create.return_value = mock_response
@@ -110,11 +136,13 @@ def test_invoke_handler_calls_responses_create():
         from agent_server.agent import invoke_handler
         result = invoke_handler(req)
 
+    from agent_server.agent import _EXTRA_HEADERS
     mock_client.responses.create.assert_called_once_with(
         model=MODEL,
         input=[{"type": "message", "role": "user", "content": "hi"}],
         tools=TOOLS,
         stream=False,
+        extra_headers=_EXTRA_HEADERS,
     )
     assert isinstance(result, ResponsesAgentResponse)
     assert len(result.output) == 1
@@ -138,11 +166,13 @@ def test_stream_handler_calls_responses_create_streaming():
         from agent_server.agent import stream_handler
         stream_handler(req)  # Returns the iterator from client
 
+    from agent_server.agent import _EXTRA_HEADERS
     mock_client.responses.create.assert_called_once_with(
         model=MODEL,
         input=[{"type": "message", "role": "user", "content": "hi"}],
         tools=TOOLS,
         stream=True,
+        extra_headers=_EXTRA_HEADERS,
     )
 
 

From 0220e1a98d40beb5345565c441689823f5a645f1 Mon Sep 17 00:00:00 2001
From: Sid Murching <sid.murching@databricks.com>
Date: Sun, 15 Mar 2026 22:22:32 -0700
Subject: [PATCH 3/6] feat: add use-supervisor-api skill

Adds a skill that guides users to build agents using the Databricks
Supervisor API (hosted tools) instead of a client-side agent loop.
Syncs the skill to all existing templates.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .claude/skills/use-supervisor-api/SKILL.md    | 173 ++++
 .gitignore                                    |   1 +
 .scripts/sync-skills.py                       |   3 +
 .scripts/templates.py                         |   4 -
 .../skills/use-supervisor-api/SKILL.md        | 173 ++++
 agent-langgraph-long-term-memory/.gitignore   |   1 +
 .../skills/use-supervisor-api/SKILL.md        | 173 ++++
 agent-langgraph-short-term-memory/.gitignore  |   3 +-
 .../skills/use-supervisor-api/SKILL.md        | 173 ++++
 agent-langgraph/.gitignore                    |   1 +
 .../skills/use-supervisor-api/SKILL.md        | 173 ++++
 agent-migration-from-model-serving/.gitignore |   1 +
 .../skills/use-supervisor-api/SKILL.md        | 173 ++++
 agent-non-conversational/.gitignore           |   1 +
 .../skills/use-supervisor-api/SKILL.md        | 173 ++++
 .../.gitignore                                |   1 +
 .../skills/use-supervisor-api/SKILL.md        | 173 ++++
 agent-openai-agents-sdk-multiagent/.gitignore |   1 +
 .../skills/use-supervisor-api/SKILL.md        | 173 ++++
 agent-openai-agents-sdk/.gitignore            |   1 +
 .../.claude/skills/add-tools/SKILL.md         |  84 --
 .../add-tools/examples/custom-mcp-server.md   |  58 --
 .../skills/add-tools/examples/experiment.yaml |   8 -
 .../add-tools/examples/genie-space.yaml       |   9 -
 .../add-tools/examples/serving-endpoint.yaml  |   7 -
 .../add-tools/examples/sql-warehouse.yaml     |   7 -
 .../add-tools/examples/uc-connection.yaml     |   9 -
 .../add-tools/examples/uc-function.yaml       |   9 -
 .../add-tools/examples/vector-search.yaml     |   9 -
 .../.claude/skills/agent-memory/SKILL.md      | 176 ----
 .../.claude/skills/deploy/SKILL.md            | 232 -----
 .../.claude/skills/discover-tools/SKILL.md    |  47 -
 .../.claude/skills/lakebase-setup/SKILL.md    | 392 -------
 .../migrate-from-model-serving/SKILL.md       | 965 ------------------
 .../.claude/skills/modify-agent/SKILL.md      | 147 ---
 .../.claude/skills/quickstart/SKILL.md        |  83 --
 .../.claude/skills/run-locally/SKILL.md       |  90 --
 agent-supervisor-api/AGENTS.md                | 115 ---
 agent-supervisor-api/CLAUDE.md                |   1 -
 agent-supervisor-api/README.md                | 106 --
 agent-supervisor-api/agent_server/__init__.py |   0
 agent-supervisor-api/agent_server/agent.py    |  90 --
 .../agent_server/evaluate_agent.py            | 100 --
 .../agent_server/start_server.py              |  17 -
 agent-supervisor-api/agent_server/utils.py    |   9 -
 agent-supervisor-api/app.yaml                 |  16 -
 agent-supervisor-api/databricks.yml           |  51 -
 agent-supervisor-api/pyproject.toml           |  36 -
 agent-supervisor-api/requirements.txt         |   1 -
 agent-supervisor-api/scripts/__init__.py      |   0
 .../scripts/discover_tools.py                 | 432 --------
 agent-supervisor-api/scripts/quickstart.py    | 768 --------------
 agent-supervisor-api/scripts/start_app.py     | 332 ------
 agent-supervisor-api/tests/__init__.py        |   0
 agent-supervisor-api/tests/test_agent.py      | 244 -----
 55 files changed, 1570 insertions(+), 4655 deletions(-)
 create mode 100644 .claude/skills/use-supervisor-api/SKILL.md
 create mode 100644 agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
 create mode 100644 agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
 create mode 100644 agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
 create mode 100644 agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
 create mode 100644 agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
 create mode 100644 agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
 create mode 100644 agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
 create mode 100644 agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
 delete mode 100644 agent-supervisor-api/.claude/skills/add-tools/SKILL.md
 delete mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/custom-mcp-server.md
 delete mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/experiment.yaml
 delete mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/genie-space.yaml
 delete mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/serving-endpoint.yaml
 delete mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/sql-warehouse.yaml
 delete mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/uc-connection.yaml
 delete mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/uc-function.yaml
 delete mode 100644 agent-supervisor-api/.claude/skills/add-tools/examples/vector-search.yaml
 delete mode 100644 agent-supervisor-api/.claude/skills/agent-memory/SKILL.md
 delete mode 100644 agent-supervisor-api/.claude/skills/deploy/SKILL.md
 delete mode 100644 agent-supervisor-api/.claude/skills/discover-tools/SKILL.md
 delete mode 100644 agent-supervisor-api/.claude/skills/lakebase-setup/SKILL.md
 delete mode 100644 agent-supervisor-api/.claude/skills/migrate-from-model-serving/SKILL.md
 delete mode 100644 agent-supervisor-api/.claude/skills/modify-agent/SKILL.md
 delete mode 100644 agent-supervisor-api/.claude/skills/quickstart/SKILL.md
 delete mode 100644 agent-supervisor-api/.claude/skills/run-locally/SKILL.md
 delete mode 100644 agent-supervisor-api/AGENTS.md
 delete mode 100644 agent-supervisor-api/CLAUDE.md
 delete mode 100644 agent-supervisor-api/README.md
 delete mode 100644 agent-supervisor-api/agent_server/__init__.py
 delete mode 100644 agent-supervisor-api/agent_server/agent.py
 delete mode 100644 agent-supervisor-api/agent_server/evaluate_agent.py
 delete mode 100644 agent-supervisor-api/agent_server/start_server.py
 delete mode 100644 agent-supervisor-api/agent_server/utils.py
 delete mode 100644 agent-supervisor-api/app.yaml
 delete mode 100644 agent-supervisor-api/databricks.yml
 delete mode 100644 agent-supervisor-api/pyproject.toml
 delete mode 100644 agent-supervisor-api/requirements.txt
 delete mode 100644 agent-supervisor-api/scripts/__init__.py
 delete mode 100755 agent-supervisor-api/scripts/discover_tools.py
 delete mode 100644 agent-supervisor-api/scripts/quickstart.py
 delete mode 100644 agent-supervisor-api/scripts/start_app.py
 delete mode 100644 agent-supervisor-api/tests/__init__.py
 delete mode 100644 agent-supervisor-api/tests/test_agent.py

diff --git a/.claude/skills/use-supervisor-api/SKILL.md b/.claude/skills/use-supervisor-api/SKILL.md
new file mode 100644
index 00000000..5f64580f
--- /dev/null
+++ b/.claude/skills/use-supervisor-api/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: use-supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+---
+
+# Use the Databricks Supervisor API
+
+> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+
+**Limitations (Beta):**
+- Usage tracking is not supported
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie",
+        "genie": {
+            "name": "my-genie-space",
+            "description": "Query sales data using natural language",
+            "space_id": "<genie-space-id>",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "name_alias": "my_function",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Agent endpoint — delegates to another agent
+    {
+        "type": "agent_endpoint",
+        "agent_endpoint": {
+            "name": "my-sub-agent",
+            "description": "A specialized sub-agent",
+            "endpoint_name": "<serving-endpoint-name>",
+        },
+    },
+    # MCP server via UC connection
+    {
+        "type": "mcp",
+        "mcp": {
+            "name": "my-mcp-server",
+            "description": "An external MCP server",
+            "connection_name": "<uc-connection-name>",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+```python
+import re
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+
+def _get_client() -> DatabricksOpenAI:
+    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
+    wc = WorkspaceClient()
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
+
+| Tool type | Resource to grant |
+|-----------|-------------------|
+| `genie` | `genie_space` with `CAN_RUN` |
+| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+
+Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
+
+```yaml
+- name: 'model-endpoint'
+  serving_endpoint:
+    name: 'databricks-claude-sonnet-4-5'
+    permission: 'CAN_QUERY'
+```
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run {{BUNDLE_NAME}}  # Deploy
+```
+
+## Troubleshooting
+
+**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/.gitignore b/.gitignore
index 8ee7fb7f..6e6d78be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -189,6 +189,7 @@ mlflow.db
 !.claude/skills/agent-langgraph-memory/
 !.claude/skills/agent-openai-memory/
 !.claude/skills/migrate-from-model-serving/
+!.claude/skills/use-supervisor-api/
 !.claude/skills/enable-feedback/
 !.claude/AGENTS.md
 !.claude/CLAUDE.md
\ No newline at end of file
diff --git a/.scripts/sync-skills.py b/.scripts/sync-skills.py
index d5fc82fd..d0f55cb4 100755
--- a/.scripts/sync-skills.py
+++ b/.scripts/sync-skills.py
@@ -55,6 +55,9 @@ def sync_template(template: str, config: dict):
     # Deploy skill (with substitution)
     copy_skill(SOURCE / "deploy", dest / "deploy", subs)
 
+    # Supervisor API skill (with substitution for bundle name in deploy command)
+    copy_skill(SOURCE / "use-supervisor-api", dest / "use-supervisor-api", subs)
+
     # SDK-specific skills (with substitution for bundle name references)
     if isinstance(sdk, list):
         # Multiple SDKs: copy skills for each, keeping SDK suffix in name
diff --git a/.scripts/templates.py b/.scripts/templates.py
index 77faeacd..edf7f549 100644
--- a/.scripts/templates.py
+++ b/.scripts/templates.py
@@ -38,8 +38,4 @@
         "sdk": ["langgraph", "openai"],
         "bundle_name": "agent_migration",
     },
-    "agent-supervisor-api": {
-        "sdk": "openai",
-        "bundle_name": "agent_supervisor_api",
-    },
 }
diff --git a/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
new file mode 100644
index 00000000..6c5f5686
--- /dev/null
+++ b/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: use-supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+---
+
+# Use the Databricks Supervisor API
+
+> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+
+**Limitations (Beta):**
+- Usage tracking is not supported
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie",
+        "genie": {
+            "name": "my-genie-space",
+            "description": "Query sales data using natural language",
+            "space_id": "<genie-space-id>",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "name_alias": "my_function",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Agent endpoint — delegates to another agent
+    {
+        "type": "agent_endpoint",
+        "agent_endpoint": {
+            "name": "my-sub-agent",
+            "description": "A specialized sub-agent",
+            "endpoint_name": "<serving-endpoint-name>",
+        },
+    },
+    # MCP server via UC connection
+    {
+        "type": "mcp",
+        "mcp": {
+            "name": "my-mcp-server",
+            "description": "An external MCP server",
+            "connection_name": "<uc-connection-name>",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+```python
+import re
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+
+def _get_client() -> DatabricksOpenAI:
+    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
+    wc = WorkspaceClient()
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
+
+| Tool type | Resource to grant |
+|-----------|-------------------|
+| `genie` | `genie_space` with `CAN_RUN` |
+| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+
+Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
+
+```yaml
+- name: 'model-endpoint'
+  serving_endpoint:
+    name: 'databricks-claude-sonnet-4-5'
+    permission: 'CAN_QUERY'
+```
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run agent_langgraph_long_term_memory  # Deploy
+```
+
+## Troubleshooting
+
+**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-langgraph-long-term-memory/.gitignore b/agent-langgraph-long-term-memory/.gitignore
index ec2a577a..3a1fbcce 100644
--- a/agent-langgraph-long-term-memory/.gitignore
+++ b/agent-langgraph-long-term-memory/.gitignore
@@ -218,3 +218,4 @@ sketch
 !.claude/skills/lakebase-setup/
 !.claude/skills/agent-memory/
 !.claude/skills/migrate-from-model-serving/
+!.claude/skills/use-supervisor-api/
diff --git a/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
new file mode 100644
index 00000000..60297de7
--- /dev/null
+++ b/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: use-supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+---
+
+# Use the Databricks Supervisor API
+
+> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+
+**Limitations (Beta):**
+- Usage tracking is not supported
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie",
+        "genie": {
+            "name": "my-genie-space",
+            "description": "Query sales data using natural language",
+            "space_id": "<genie-space-id>",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "name_alias": "my_function",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Agent endpoint — delegates to another agent
+    {
+        "type": "agent_endpoint",
+        "agent_endpoint": {
+            "name": "my-sub-agent",
+            "description": "A specialized sub-agent",
+            "endpoint_name": "<serving-endpoint-name>",
+        },
+    },
+    # MCP server via UC connection
+    {
+        "type": "mcp",
+        "mcp": {
+            "name": "my-mcp-server",
+            "description": "An external MCP server",
+            "connection_name": "<uc-connection-name>",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+```python
+import re
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+
+def _get_client() -> DatabricksOpenAI:
+    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
+    wc = WorkspaceClient()
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
+
+| Tool type | Resource to grant |
+|-----------|-------------------|
+| `genie` | `genie_space` with `CAN_RUN` |
+| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+
+Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
+
+```yaml
+- name: 'model-endpoint'
+  serving_endpoint:
+    name: 'databricks-claude-sonnet-4-5'
+    permission: 'CAN_QUERY'
+```
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run agent_langgraph_short_term_memory  # Deploy
+```
+
+## Troubleshooting
+
+**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-langgraph-short-term-memory/.gitignore b/agent-langgraph-short-term-memory/.gitignore
index 1c494c4c..a698549a 100644
--- a/agent-langgraph-short-term-memory/.gitignore
+++ b/agent-langgraph-short-term-memory/.gitignore
@@ -217,4 +217,5 @@ sketch
 !.claude/skills/modify-agent/
 !.claude/skills/lakebase-setup/
 !.claude/skills/agent-memory/
-!.claude/skills/migrate-from-model-serving/
\ No newline at end of file
+!.claude/skills/migrate-from-model-serving/
+!.claude/skills/use-supervisor-api/
\ No newline at end of file
diff --git a/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
new file mode 100644
index 00000000..375e3da3
--- /dev/null
+++ b/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: use-supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+---
+
+# Use the Databricks Supervisor API
+
+> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+
+**Limitations (Beta):**
+- Usage tracking is not supported
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie",
+        "genie": {
+            "name": "my-genie-space",
+            "description": "Query sales data using natural language",
+            "space_id": "<genie-space-id>",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "name_alias": "my_function",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Agent endpoint — delegates to another agent
+    {
+        "type": "agent_endpoint",
+        "agent_endpoint": {
+            "name": "my-sub-agent",
+            "description": "A specialized sub-agent",
+            "endpoint_name": "<serving-endpoint-name>",
+        },
+    },
+    # MCP server via UC connection
+    {
+        "type": "mcp",
+        "mcp": {
+            "name": "my-mcp-server",
+            "description": "An external MCP server",
+            "connection_name": "<uc-connection-name>",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+```python
+import re
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+
+def _get_client() -> DatabricksOpenAI:
+    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
+    wc = WorkspaceClient()
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
+
+| Tool type | Resource to grant |
+|-----------|-------------------|
+| `genie` | `genie_space` with `CAN_RUN` |
+| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+
+Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
+
+```yaml
+- name: 'model-endpoint'
+  serving_endpoint:
+    name: 'databricks-claude-sonnet-4-5'
+    permission: 'CAN_QUERY'
+```
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run agent_langgraph  # Deploy
+```
+
+## Troubleshooting
+
+**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-langgraph/.gitignore b/agent-langgraph/.gitignore
index 8bc4e76b..16a2f70d 100644
--- a/agent-langgraph/.gitignore
+++ b/agent-langgraph/.gitignore
@@ -217,6 +217,7 @@ sketch
 !.claude/skills/lakebase-setup/
 !.claude/skills/agent-memory/
 !.claude/skills/migrate-from-model-serving/
+!.claude/skills/use-supervisor-api/
 
 **/.env
 **/.env.local
\ No newline at end of file
diff --git a/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md b/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
new file mode 100644
index 00000000..911cd96f
--- /dev/null
+++ b/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: use-supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+---
+
+# Use the Databricks Supervisor API
+
+> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+
+**Limitations (Beta):**
+- Usage tracking is not supported
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie",
+        "genie": {
+            "name": "my-genie-space",
+            "description": "Query sales data using natural language",
+            "space_id": "<genie-space-id>",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "name_alias": "my_function",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Agent endpoint — delegates to another agent
+    {
+        "type": "agent_endpoint",
+        "agent_endpoint": {
+            "name": "my-sub-agent",
+            "description": "A specialized sub-agent",
+            "endpoint_name": "<serving-endpoint-name>",
+        },
+    },
+    # MCP server via UC connection
+    {
+        "type": "mcp",
+        "mcp": {
+            "name": "my-mcp-server",
+            "description": "An external MCP server",
+            "connection_name": "<uc-connection-name>",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+```python
+import re
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+
+def _get_client() -> DatabricksOpenAI:
+    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
+    wc = WorkspaceClient()
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
+
+| Tool type | Resource to grant |
+|-----------|-------------------|
+| `genie` | `genie_space` with `CAN_RUN` |
+| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+
+Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
+
+```yaml
+- name: 'model-endpoint'
+  serving_endpoint:
+    name: 'databricks-claude-sonnet-4-5'
+    permission: 'CAN_QUERY'
+```
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run agent_migration  # Deploy
+```
+
+## Troubleshooting
+
+**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-migration-from-model-serving/.gitignore b/agent-migration-from-model-serving/.gitignore
index 9bd156cf..d6c27b9b 100644
--- a/agent-migration-from-model-serving/.gitignore
+++ b/agent-migration-from-model-serving/.gitignore
@@ -219,6 +219,7 @@ sketch
 !.claude/skills/lakebase-setup/
 !.claude/skills/agent-memory/
 !.claude/skills/migrate-from-model-serving/
+!.claude/skills/use-supervisor-api/
 
 **/.env
 **/.env.local
\ No newline at end of file
diff --git a/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md b/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
new file mode 100644
index 00000000..7d5be0ad
--- /dev/null
+++ b/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: use-supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+---
+
+# Use the Databricks Supervisor API
+
+> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+
+**Limitations (Beta):**
+- Usage tracking is not supported
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie",
+        "genie": {
+            "name": "my-genie-space",
+            "description": "Query sales data using natural language",
+            "space_id": "<genie-space-id>",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "name_alias": "my_function",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Agent endpoint — delegates to another agent
+    {
+        "type": "agent_endpoint",
+        "agent_endpoint": {
+            "name": "my-sub-agent",
+            "description": "A specialized sub-agent",
+            "endpoint_name": "<serving-endpoint-name>",
+        },
+    },
+    # MCP server via UC connection
+    {
+        "type": "mcp",
+        "mcp": {
+            "name": "my-mcp-server",
+            "description": "An external MCP server",
+            "connection_name": "<uc-connection-name>",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+```python
+import re
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+
+def _get_client() -> DatabricksOpenAI:
+    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
+    wc = WorkspaceClient()
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
+
+| Tool type | Resource to grant |
+|-----------|-------------------|
+| `genie` | `genie_space` with `CAN_RUN` |
+| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+
+Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
+
+```yaml
+- name: 'model-endpoint'
+  serving_endpoint:
+    name: 'databricks-claude-sonnet-4-5'
+    permission: 'CAN_QUERY'
+```
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run agent_non_conversational  # Deploy
+```
+
+## Troubleshooting
+
+**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-non-conversational/.gitignore b/agent-non-conversational/.gitignore
index 8bc4e76b..16a2f70d 100644
--- a/agent-non-conversational/.gitignore
+++ b/agent-non-conversational/.gitignore
@@ -217,6 +217,7 @@ sketch
 !.claude/skills/lakebase-setup/
 !.claude/skills/agent-memory/
 !.claude/skills/migrate-from-model-serving/
+!.claude/skills/use-supervisor-api/
 
 **/.env
 **/.env.local
\ No newline at end of file
diff --git a/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
new file mode 100644
index 00000000..f18ed94f
--- /dev/null
+++ b/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: use-supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+---
+
+# Use the Databricks Supervisor API
+
+> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+
+**Limitations (Beta):**
+- Usage tracking is not supported
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie",
+        "genie": {
+            "name": "my-genie-space",
+            "description": "Query sales data using natural language",
+            "space_id": "<genie-space-id>",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "name_alias": "my_function",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Agent endpoint — delegates to another agent
+    {
+        "type": "agent_endpoint",
+        "agent_endpoint": {
+            "name": "my-sub-agent",
+            "description": "A specialized sub-agent",
+            "endpoint_name": "<serving-endpoint-name>",
+        },
+    },
+    # MCP server via UC connection
+    {
+        "type": "mcp",
+        "mcp": {
+            "name": "my-mcp-server",
+            "description": "An external MCP server",
+            "connection_name": "<uc-connection-name>",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+```python
+import re
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+
+def _get_client() -> DatabricksOpenAI:
+    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
+    wc = WorkspaceClient()
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
+
+| Tool type | Resource to grant |
+|-----------|-------------------|
+| `genie` | `genie_space` with `CAN_RUN` |
+| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+
+Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
+
+```yaml
+- name: 'model-endpoint'
+  serving_endpoint:
+    name: 'databricks-claude-sonnet-4-5'
+    permission: 'CAN_QUERY'
+```
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run agent_openai_agents_sdk_long_running_agent  # Deploy
+```
+
+## Troubleshooting
+
+**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-openai-agents-sdk-long-running-agent/.gitignore b/agent-openai-agents-sdk-long-running-agent/.gitignore
index 9f7d0756..cc094a88 100644
--- a/agent-openai-agents-sdk-long-running-agent/.gitignore
+++ b/agent-openai-agents-sdk-long-running-agent/.gitignore
@@ -215,6 +215,7 @@ sketch
 !.claude/skills/run-locally/
 !.claude/skills/modify-agent/
 !.claude/skills/migrate-from-model-serving/
+!.claude/skills/use-supervisor-api/
 !.claude/skills/agent-memory/
 !.claude/skills/lakebase-setup/
 
diff --git a/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
new file mode 100644
index 00000000..5ba51cb7
--- /dev/null
+++ b/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: use-supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+---
+
+# Use the Databricks Supervisor API
+
+> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+
+**Limitations (Beta):**
+- Usage tracking is not supported
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie",
+        "genie": {
+            "name": "my-genie-space",
+            "description": "Query sales data using natural language",
+            "space_id": "<genie-space-id>",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "name_alias": "my_function",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Agent endpoint — delegates to another agent
+    {
+        "type": "agent_endpoint",
+        "agent_endpoint": {
+            "name": "my-sub-agent",
+            "description": "A specialized sub-agent",
+            "endpoint_name": "<serving-endpoint-name>",
+        },
+    },
+    # MCP server via UC connection
+    {
+        "type": "mcp",
+        "mcp": {
+            "name": "my-mcp-server",
+            "description": "An external MCP server",
+            "connection_name": "<uc-connection-name>",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+```python
+import re
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+
+def _get_client() -> DatabricksOpenAI:
+    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
+    wc = WorkspaceClient()
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
+
+| Tool type | Resource to grant |
+|-----------|-------------------|
+| `genie` | `genie_space` with `CAN_RUN` |
+| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+
+Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
+
+```yaml
+- name: 'model-endpoint'
+  serving_endpoint:
+    name: 'databricks-claude-sonnet-4-5'
+    permission: 'CAN_QUERY'
+```
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run agent_openai_agents_sdk_multiagent  # Deploy
+```
+
+## Troubleshooting
+
+**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-openai-agents-sdk-multiagent/.gitignore b/agent-openai-agents-sdk-multiagent/.gitignore
index 1607735d..65539cd8 100644
--- a/agent-openai-agents-sdk-multiagent/.gitignore
+++ b/agent-openai-agents-sdk-multiagent/.gitignore
@@ -215,6 +215,7 @@ sketch
 !.claude/skills/run-locally/
 !.claude/skills/modify-agent/
 !.claude/skills/migrate-from-model-serving/
+!.claude/skills/use-supervisor-api/
 
 **/.env
 **/.env.local
diff --git a/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
new file mode 100644
index 00000000..6927175e
--- /dev/null
+++ b/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
@@ -0,0 +1,173 @@
+---
+name: use-supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+---
+
+# Use the Databricks Supervisor API
+
+> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+
+**Limitations (Beta):**
+- Usage tracking is not supported
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.9.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie",
+        "genie": {
+            "name": "my-genie-space",
+            "description": "Query sales data using natural language",
+            "space_id": "<genie-space-id>",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "name_alias": "my_function",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Agent endpoint — delegates to another agent
+    {
+        "type": "agent_endpoint",
+        "agent_endpoint": {
+            "name": "my-sub-agent",
+            "description": "A specialized sub-agent",
+            "endpoint_name": "<serving-endpoint-name>",
+        },
+    },
+    # MCP server via UC connection
+    {
+        "type": "mcp",
+        "mcp": {
+            "name": "my-mcp-server",
+            "description": "An external MCP server",
+            "connection_name": "<uc-connection-name>",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+```python
+import re
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+
+def _get_client() -> DatabricksOpenAI:
+    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
+    wc = WorkspaceClient()
+    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
+    workspace_id = wc.get_workspace_id()
+    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
+    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
+    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _get_client().responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
+
+| Tool type | Resource to grant |
+|-----------|-------------------|
+| `genie` | `genie_space` with `CAN_RUN` |
+| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+
+Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
+
+```yaml
+- name: 'model-endpoint'
+  serving_endpoint:
+    name: 'databricks-claude-sonnet-4-5'
+    permission: 'CAN_QUERY'
+```
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run agent_openai_agents_sdk  # Deploy
+```
+
+## Troubleshooting
+
+**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-openai-agents-sdk/.gitignore b/agent-openai-agents-sdk/.gitignore
index 1607735d..65539cd8 100644
--- a/agent-openai-agents-sdk/.gitignore
+++ b/agent-openai-agents-sdk/.gitignore
@@ -215,6 +215,7 @@ sketch
 !.claude/skills/run-locally/
 !.claude/skills/modify-agent/
 !.claude/skills/migrate-from-model-serving/
+!.claude/skills/use-supervisor-api/
 
 **/.env
 **/.env.local
diff --git a/agent-supervisor-api/.claude/skills/add-tools/SKILL.md b/agent-supervisor-api/.claude/skills/add-tools/SKILL.md
deleted file mode 100644
index e07b5279..00000000
--- a/agent-supervisor-api/.claude/skills/add-tools/SKILL.md
+++ /dev/null
@@ -1,84 +0,0 @@
----
-name: add-tools
-description: "Add tools to your agent and grant required permissions in databricks.yml. Use when: (1) Adding MCP servers, Genie spaces, vector search, or UC functions to agent, (2) Permission errors at runtime, (3) User says 'add tool', 'connect to', 'grant permission', (4) Configuring databricks.yml resources."
----
-
-# Add Tools & Grant Permissions
-
-> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks <command> --profile <profile>`
-
-**After adding any MCP server to your agent, you MUST grant the app access in `databricks.yml`.**
-
-Without this, you'll get permission errors when the agent tries to use the resource.
-
-## Workflow
-
-**Step 1:** Add MCP server in `agent_server/agent.py`:
-```python
-from databricks_openai.agents import McpServer
-
-genie_server = McpServer(
-    url=f"{host}/api/2.0/mcp/genie/01234567-89ab-cdef",
-    name="my genie space",
-)
-
-agent = Agent(
-    name="my agent",
-    model="databricks-claude-3-7-sonnet",
-    mcp_servers=[genie_server],
-)
-```
-
-**Step 2:** Grant access in `databricks.yml`:
-```yaml
-resources:
-  apps:
-    agent_supervisor_api:
-      resources:
-        - name: 'my_genie_space'
-          genie_space:
-            name: 'My Genie Space'
-            space_id: '01234567-89ab-cdef'
-            permission: 'CAN_RUN'
-```
-
-**Step 3:** Deploy with `databricks bundle deploy` (see **deploy** skill)
-
-## Resource Type Examples
-
-See the `examples/` directory for complete YAML snippets:
-
-| File | Resource Type | When to Use |
-|------|--------------|-------------|
-| `uc-function.yaml` | Unity Catalog function | UC functions |
-| `uc-connection.yaml` | UC connection | External MCP servers |
-| `vector-search.yaml` | Vector search index | RAG applications |
-| `sql-warehouse.yaml` | SQL warehouse | SQL execution |
-| `serving-endpoint.yaml` | Model serving endpoint | Model inference |
-| `genie-space.yaml` | Genie space | Natural language data |
-| `experiment.yaml` | MLflow experiment | Tracing (already configured) |
-| `custom-mcp-server.md` | Custom MCP apps | Apps starting with `mcp-*` |
-
-## Custom MCP Servers (Databricks Apps)
-
-Apps are **not yet supported** as resource dependencies in `databricks.yml`. Manual permission grant required:
-
-**Step 1:** Get your agent app's service principal:
-```bash
-databricks apps get <your-agent-app-name> --output json | jq -r '.service_principal_name'
-```
-
-**Step 2:** Grant permission on the MCP server app:
-```bash
-databricks apps update-permissions <mcp-server-app-name> \
-  --json '{"access_control_list": [{"service_principal_name": "<agent-app-service-principal>", "permission_level": "CAN_USE"}]}'
-```
-
-See `examples/custom-mcp-server.md` for detailed steps.
-
-## Important Notes
-
-- **MLflow experiment**: Already configured in template, no action needed
-- **Multiple resources**: Add multiple entries under `resources:` list
-- **Permission types vary**: Each resource type has specific permission values
-- **Deploy after changes**: Run `databricks bundle deploy` after modifying `databricks.yml`
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/custom-mcp-server.md b/agent-supervisor-api/.claude/skills/add-tools/examples/custom-mcp-server.md
deleted file mode 100644
index 86b45858..00000000
--- a/agent-supervisor-api/.claude/skills/add-tools/examples/custom-mcp-server.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# Custom MCP Server (Databricks App)
-
-Custom MCP servers are Databricks Apps with names starting with `mcp-*`.
-
-**Apps are not yet supported as resource dependencies in `databricks.yml`**, so manual permission grant is required.
-
-## Steps
-
-### 1. Add MCP server in `agent_server/agent.py`
-
-```python
-from databricks_openai.agents import McpServer
-
-custom_mcp = McpServer(
-    url="https://mcp-my-server.cloud.databricks.com/mcp",
-    name="my custom mcp server",
-)
-
-agent = Agent(
-    name="my agent",
-    model="databricks-claude-3-7-sonnet",
-    mcp_servers=[custom_mcp],
-)
-```
-
-### 2. Deploy your agent app first
-
-```bash
-databricks bundle deploy
-databricks bundle run <your-app-resource-name>  # from databricks.yml resources.apps.*
-```
-
-### 3. Get your agent app's service principal
-
-```bash
-databricks apps get <your-agent-app-name> --output json | jq -r '.service_principal_name'
-```
-
-Example output: `sp-abc123-def456`
-
-### 4. Grant permission on the MCP server app
-
-```bash
-databricks apps update-permissions <mcp-server-app-name> \
-  --json '{"access_control_list": [{"service_principal_name": "<agent-app-service-principal>", "permission_level": "CAN_USE"}]}'
-```
-
-Example:
-```bash
-databricks apps update-permissions mcp-my-server \
-  --json '{"access_control_list": [{"service_principal_name": "sp-abc123-def456", "permission_level": "CAN_USE"}]}'
-```
-
-## Notes
-
-- This manual step is required each time you connect to a new custom MCP server
-- The permission grant persists across deployments
-- If you redeploy the agent app with a new service principal, you'll need to grant permissions again
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/experiment.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/experiment.yaml
deleted file mode 100644
index ac5c626a..00000000
--- a/agent-supervisor-api/.claude/skills/add-tools/examples/experiment.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-# MLflow Experiment
-# Use for: Tracing and model logging
-# Note: Already configured in template's databricks.yml
-
-- name: 'my_experiment'
-  experiment:
-    experiment_id: '12349876'
-    permission: 'CAN_MANAGE'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/genie-space.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/genie-space.yaml
deleted file mode 100644
index 71589d52..00000000
--- a/agent-supervisor-api/.claude/skills/add-tools/examples/genie-space.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-# Genie Space
-# Use for: Natural language interface to data
-# MCP URL: {host}/api/2.0/mcp/genie/{space_id}
-
-- name: 'my_genie_space'
-  genie_space:
-    name: 'My Genie Space'
-    space_id: '01234567-89ab-cdef'
-    permission: 'CAN_RUN'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/serving-endpoint.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/serving-endpoint.yaml
deleted file mode 100644
index b49ce9da..00000000
--- a/agent-supervisor-api/.claude/skills/add-tools/examples/serving-endpoint.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-# Model Serving Endpoint
-# Use for: Model inference endpoints
-
-- name: 'my_endpoint'
-  serving_endpoint:
-    name: 'my_endpoint'
-    permission: 'CAN_QUERY'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/sql-warehouse.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/sql-warehouse.yaml
deleted file mode 100644
index a6ce9446..00000000
--- a/agent-supervisor-api/.claude/skills/add-tools/examples/sql-warehouse.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-# SQL Warehouse
-# Use for: SQL query execution
-
-- name: 'my_warehouse'
-  sql_warehouse:
-    sql_warehouse_id: 'abc123def456'
-    permission: 'CAN_USE'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/uc-connection.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/uc-connection.yaml
deleted file mode 100644
index 316675fe..00000000
--- a/agent-supervisor-api/.claude/skills/add-tools/examples/uc-connection.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-# Unity Catalog Connection
-# Use for: External MCP servers via UC connections
-# MCP URL: {host}/api/2.0/mcp/external/{connection_name}
-
-- name: 'my_connection'
-  uc_securable:
-    securable_full_name: 'my-connection-name'
-    securable_type: 'CONNECTION'
-    permission: 'USE_CONNECTION'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/uc-function.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/uc-function.yaml
deleted file mode 100644
index 43f938a9..00000000
--- a/agent-supervisor-api/.claude/skills/add-tools/examples/uc-function.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-# Unity Catalog Function
-# Use for: UC functions accessed via MCP server
-# MCP URL: {host}/api/2.0/mcp/functions/{catalog}/{schema}/{function_name}
-
-- name: 'my_uc_function'
-  uc_securable:
-    securable_full_name: 'catalog.schema.function_name'
-    securable_type: 'FUNCTION'
-    permission: 'EXECUTE'
diff --git a/agent-supervisor-api/.claude/skills/add-tools/examples/vector-search.yaml b/agent-supervisor-api/.claude/skills/add-tools/examples/vector-search.yaml
deleted file mode 100644
index 0ba39027..00000000
--- a/agent-supervisor-api/.claude/skills/add-tools/examples/vector-search.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-# Vector Search Index
-# Use for: RAG applications with unstructured data
-# MCP URL: {host}/api/2.0/mcp/vector-search/{catalog}/{schema}/{index_name}
-
-- name: 'my_vector_index'
-  uc_securable:
-    securable_full_name: 'catalog.schema.index_name'
-    securable_type: 'TABLE'
-    permission: 'SELECT'
diff --git a/agent-supervisor-api/.claude/skills/agent-memory/SKILL.md b/agent-supervisor-api/.claude/skills/agent-memory/SKILL.md
deleted file mode 100644
index 896a8baa..00000000
--- a/agent-supervisor-api/.claude/skills/agent-memory/SKILL.md
+++ /dev/null
@@ -1,176 +0,0 @@
----
-name: agent-openai-memory
-description: "Add memory capabilities to your agent. Use when: (1) User asks about 'memory', 'state', 'remember', 'conversation history', (2) Want to persist conversations or user preferences, (3) Adding checkpointing or long-term storage."
----
-
-# Stateful Memory with OpenAI Agents SDK Sessions
-
-This template uses OpenAI Agents SDK [Sessions](https://openai.github.io/openai-agents-python/sessions/) with `AsyncDatabricksSession` to persist conversation history to a Databricks Lakebase instance.
-
-## How Sessions Work
-
-Sessions automatically manage conversation history for multi-turn interactions:
-
-1. **Before each run**: The session retrieves prior conversation history and prepends it to input
-2. **During the run**: New items (user messages, responses, tool calls) are generated
-3. **After each run**: All new items are automatically stored in the session
-
-This eliminates the need to manually manage conversation state between runs.
-
-## Key Concepts
-
-| Concept | Description |
-|---------|-------------|
-| **Session** | Stores conversation history for a specific `session_id` |
-| **`session_id`** | Unique identifier linking requests to the same conversation |
-| **`AsyncDatabricksSession`** | Session implementation backed by Databricks Lakebase |
-| **`LAKEBASE_INSTANCE_NAME`** | Environment variable specifying the Lakebase instance |
-
-## How This Template Uses Sessions
-
-### Session Creation (`agent_server/agent.py`)
-
-```python
-from databricks_openai.agents import AsyncDatabricksSession
-
-session = AsyncDatabricksSession(
-    session_id=get_session_id(request),
-    instance_name=LAKEBASE_INSTANCE_NAME,
-)
-
-result = await Runner.run(agent, messages, session=session)
-```
-
-### Session ID Extraction (`agent_server/agent.py`)
-
-The `session_id` is extracted from `custom_inputs` or auto-generated:
-
-```python
-def get_session_id(request: ResponsesAgentRequest) -> str:
-    if hasattr(request, "custom_inputs") and request.custom_inputs:
-        if "session_id" in request.custom_inputs:
-            return request.custom_inputs["session_id"]
-    return str(uuid7())
-```
-
-### Lakebase Instance Resolution (`agent_server/utils.py`)
-
-The `LAKEBASE_INSTANCE_NAME` env var can be either an instance name or a hostname. The `resolve_lakebase_instance_name()` function handles both cases:
-
-```python
-_LAKEBASE_INSTANCE_NAME_RAW = os.environ.get("LAKEBASE_INSTANCE_NAME")
-LAKEBASE_INSTANCE_NAME = resolve_lakebase_instance_name(_LAKEBASE_INSTANCE_NAME_RAW)
-```
-
----
-
-## Prerequisites
-
-1. **Dependency**: `databricks-openai[memory]` must be in `pyproject.toml` (already included)
-
-2. **Lakebase instance**: You need a Databricks Lakebase instance. See the **lakebase-setup** skill for creating and configuring one.
-
-3. **Environment variable**: Set `LAKEBASE_INSTANCE_NAME` in your `.env` file:
-   ```bash
-   LAKEBASE_INSTANCE_NAME=<your-lakebase-instance-name>
-   ```
-
----
-
-## Configuration Files
-
-### databricks.yml (Lakebase Resource)
-
-Add the Lakebase database resource to your app:
-
-```yaml
-resources:
-  apps:
-    agent_openai_agents_sdk_short_term_memory:
-      name: "your-app-name"
-      source_code_path: ./
-
-      resources:
-        # ... other resources (experiment, etc.) ...
-
-        # Lakebase instance for session storage
-        - name: 'database'
-          database:
-            instance_name: '<your-lakebase-instance-name>'
-            database_name: 'databricks_postgres'
-            permission: 'CAN_CONNECT_AND_CREATE'
-```
-
-### databricks.yml config block (Environment Variables)
-
-The `LAKEBASE_INSTANCE_NAME` env var is resolved from the database resource at deploy time. Add to your app's `config.env` in `databricks.yml`:
-
-```yaml
-      config:
-        env:
-          - name: LAKEBASE_INSTANCE_NAME
-            value_from: "database"
-```
-
-### .env (Local Development)
-
-```bash
-LAKEBASE_INSTANCE_NAME=<your-lakebase-instance-name>
-```
-
----
-
-## Testing Sessions
-
-### Test Multi-Turn Conversation Locally
-
-```bash
-# Start the server
-uv run start-app
-
-# First message - starts a new session
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d '{"input": [{"role": "user", "content": "Hello, I live in SF!"}]}'
-
-# Note the session_id from custom_outputs in the response
-
-# Second message - continues the same session
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d '{
-      "input": [{"role": "user", "content": "What city did I say I live in?"}],
-      "custom_inputs": {"session_id": "<session_id from previous response>"}
-  }'
-```
-
-### Test Streaming
-
-```bash
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d '{
-      "input": [{"role": "user", "content": "Hello!"}],
-      "stream": true
-  }'
-```
-
----
-
-## Troubleshooting
-
-| Issue | Cause | Solution |
-|-------|-------|----------|
-| **"LAKEBASE_INSTANCE_NAME environment variable is required"** | Missing env var | Set `LAKEBASE_INSTANCE_NAME` in `.env` |
-| **SSL connection closed unexpectedly** | Network/instance issue | Verify Lakebase instance is running: `databricks lakebase instances get <name>` |
-| **Agent doesn't remember previous messages** | Different session_id | Pass the same `session_id` via `custom_inputs` across requests |
-| **"Unable to resolve hostname"** | Hostname doesn't match any instance | Verify the hostname or use the instance name directly |
-| **Permission denied** | Missing Lakebase access | Add `database` resource to `databricks.yml` with `CAN_CONNECT_AND_CREATE` |
-
----
-
-## Next Steps
-
-- Configure Lakebase: see **lakebase-setup** skill
-- Test locally: see **run-locally** skill
-- Deploy: see **deploy** skill
diff --git a/agent-supervisor-api/.claude/skills/deploy/SKILL.md b/agent-supervisor-api/.claude/skills/deploy/SKILL.md
deleted file mode 100644
index 6c6f40e0..00000000
--- a/agent-supervisor-api/.claude/skills/deploy/SKILL.md
+++ /dev/null
@@ -1,232 +0,0 @@
----
-name: deploy
-description: "Deploy agent to Databricks Apps using DAB (Databricks Asset Bundles). Use when: (1) User says 'deploy', 'push to databricks', or 'bundle deploy', (2) 'App already exists' error occurs, (3) Need to bind/unbind existing apps, (4) Debugging deployed apps, (5) Querying deployed app endpoints."
----
-
-# Deploy to Databricks Apps
-
-## Profile Configuration
-
-**IMPORTANT:** Before running any `databricks` CLI command, read the `.env` file to get the `DATABRICKS_CONFIG_PROFILE` value. All commands must include the profile:
-
-```bash
-databricks <command> --profile <profile>
-```
-
-For example, if `.env` has `DATABRICKS_CONFIG_PROFILE=dev`, run `databricks bundle deploy --profile dev`. Without this, the CLI may target the wrong workspace.
-
-## App Naming Convention
-
-Unless the user specifies a different name, apps should use the prefix `agent-*`:
-- `agent-data-analyst`
-- `agent-customer-support`
-- `agent-code-helper`
-
-Update the app name in `databricks.yml`:
-```yaml
-resources:
-  apps:
-    agent_supervisor_api:
-      name: "agent-your-app-name"  # Use agent-* prefix
-```
-
-## Deploy Commands
-
-**IMPORTANT:** Always run BOTH commands to deploy and start your app:
-
-```bash
-# 1. Validate bundle configuration (catches errors before deploy)
-databricks bundle validate
-
-# 2. Deploy the bundle (creates/updates resources, uploads files)
-databricks bundle deploy
-
-# 3. Run the app (starts/restarts with uploaded source code) - REQUIRED!
-databricks bundle run agent_supervisor_api
-```
-
-> **Note:** `bundle deploy` only uploads files and configures resources. `bundle run` is **required** to actually start/restart the app with the new code. If you only run `deploy`, the app will continue running old code!
-
-The resource key `agent_supervisor_api` matches the app name in `databricks.yml` under `resources.apps`.
-
-## Handling "App Already Exists" Error
-
-If `databricks bundle deploy` fails with:
-```
-Error: failed to create app
-Failed to create app <app-name>. An app with the same name already exists.
-```
-
-**Ask the user:** "Would you like to bind the existing app to this bundle, or delete it and create a new one?"
-
-### Option 1: Bind Existing App (Recommended)
-
-**Step 1:** Get the existing app's full configuration:
-```bash
-# Get app config including budget_policy_id and other server-side settings
-databricks apps get <existing-app-name> --output json | jq '{name, budget_policy_id, description}'
-```
-
-**Step 2:** Update `databricks.yml` to match the existing app's configuration exactly:
-```yaml
-resources:
-  apps:
-    agent_supervisor_api:
-      name: "existing-app-name"  # Must match exactly
-      budget_policy_id: "xxx-xxx-xxx"  # Copy from step 1 if present
-```
-
-> **Why this matters:** Existing apps may have server-side configuration (like `budget_policy_id`) that isn't in your bundle. If these don't match, Terraform will fail with "Provider produced inconsistent result after apply". Always sync the app's current config to `databricks.yml` before binding.
-
-**Step 3:** If deploying to a `mode: production` target, set `workspace.root_path`:
-```yaml
-targets:
-  prod:
-    mode: production
-    workspace:
-      root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target}
-```
-
-> **Why this matters:** Production mode requires an explicit root path to ensure only one copy of the bundle is deployed. Without this, the deploy will fail with a recommendation to set `workspace.root_path`.
-
-**Step 4:** Check if already bound, then bind if needed:
-```bash
-# Check if resource is already managed by this bundle
-databricks bundle summary --output json | jq '.resources.apps'
-
-# If the app appears in the summary, skip binding and go to Step 5
-# If NOT in summary, bind the resource:
-databricks bundle deployment bind agent_supervisor_api <existing-app-name> --auto-approve
-```
-
-> **Note:** If bind fails with "Resource already managed by Terraform", the app is already bound to this bundle. Skip to Step 5 and deploy directly.
-
-**Step 5:** Deploy:
-```bash
-databricks bundle deploy
-databricks bundle run agent_supervisor_api
-```
-
-### Option 2: Delete and Recreate
-
-```bash
-databricks apps delete <app-name>
-databricks bundle deploy
-```
-
-**Warning:** This permanently deletes the app's URL, OAuth credentials, and service principal.
-
-## Unbinding an App
-
-To remove the link between bundle and deployed app:
-
-```bash
-databricks bundle deployment unbind agent_supervisor_api
-```
-
-Use when:
-- Switching to a different app
-- Letting bundle create a new app
-- Switching between deployed instances
-
-Note: Unbinding doesn't delete the deployed app.
-
-## Query Deployed App
-
-> **IMPORTANT:** Databricks Apps are **only** queryable via OAuth token. You **cannot** use a Personal Access Token (PAT) to query your agent. Attempting to use a PAT will result in a 302 redirect error.
-
-**Get OAuth token:**
-```bash
-databricks auth token | jq -r '.access_token'
-```
-
-**Send request:**
-```bash
-curl -X POST <app-url>/invocations \
-  -H "Authorization: Bearer <oauth-token>" \
-  -H "Content-Type: application/json" \
-  -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }'
-```
-
-**If using memory** - include `user_id` to scope memories per user:
-```bash
-curl -X POST <app-url>/invocations \
-  -H "Authorization: Bearer <oauth-token>" \
-  -H "Content-Type: application/json" \
-  -d '{
-      "input": [{"role": "user", "content": "What do you remember about me?"}],
-      "custom_inputs": {"user_id": "user@example.com"}
-  }'
-```
-
-## On-Behalf-Of (OBO) User Authentication
-
-To authenticate as the requesting user instead of the app service principal:
-
-```python
-from agent_server.utils import get_user_workspace_client
-
-# In your agent code
-user_client = get_user_workspace_client()
-# Use user_client for operations that should run as the user
-```
-
-This is useful when you want the agent to access resources with the user's permissions rather than the app's service principal permissions.
-
-See: [OBO authentication documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/auth#retrieve-user-authorization-credentials)
-
-## Debug Deployed Apps
-
-```bash
-# View logs (follow mode)
-databricks apps logs <app-name> --follow
-
-# Check app status
-databricks apps get <app-name> --output json | jq '{app_status, compute_status}'
-
-# Get app URL
-databricks apps get <app-name> --output json | jq -r '.url'
-```
-
-## Important Notes
-
-- **App naming convention**: App names must be prefixed with `agent-` (e.g., `agent-my-assistant`, `agent-data-analyst`)
-- **Name is immutable**: Changing the `name` field in `databricks.yml` forces app replacement (destroy + create)
-- **Remote Terraform state**: Databricks stores state remotely; same app detected across directories
-- **Review the plan**: Look for `# forces replacement` in Terraform output before confirming
-
-## FAQ
-
-**Q: I see a 200 OK in the logs, but get an error in the actual stream. What's going on?**
-
-This is expected behavior. The initial 200 OK confirms stream setup was successful. Errors that occur during streaming don't affect the initial HTTP status code. Check the stream content for the actual error message.
-
-**Q: When querying my agent, I get a 302 redirect error. What's wrong?**
-
-You're likely using a Personal Access Token (PAT). Databricks Apps only support OAuth tokens. Generate one with:
-```bash
-databricks auth token
-```
-
-**Q: How do I add dependencies to my agent?**
-
-Use `uv add`:
-```bash
-uv add <package_name>
-# Example: uv add "mlflow-skinny[databricks]"
-```
-
-## Troubleshooting
-
-| Issue | Solution |
-|-------|----------|
-| Validation errors | Run `databricks bundle validate` to see detailed errors before deploying |
-| Permission errors at runtime | Grant resources in `databricks.yml` (see **add-tools** skill) |
-| Lakebase access errors | See **lakebase-setup** skill for permissions (if using memory) |
-| App not starting | Check `databricks apps logs <app-name>` |
-| Auth token expired | Run `databricks auth token` again |
-| 302 redirect error | Use OAuth token, not PAT |
-| "Provider produced inconsistent result" | Sync app config to `databricks.yml` |
-| "should set workspace.root_path" | Add `root_path` to production target |
-| App running old code after deploy | Run `databricks bundle run agent_supervisor_api` after deploy |
-| Env var is None in deployed app | Check `value_from` in databricks.yml `config.env` matches resource `name` |
diff --git a/agent-supervisor-api/.claude/skills/discover-tools/SKILL.md b/agent-supervisor-api/.claude/skills/discover-tools/SKILL.md
deleted file mode 100644
index 87c3f519..00000000
--- a/agent-supervisor-api/.claude/skills/discover-tools/SKILL.md
+++ /dev/null
@@ -1,47 +0,0 @@
----
-name: discover-tools
-description: "Discover available tools and resources in Databricks workspace. Use when: (1) User asks 'what tools are available', (2) Before writing agent code, (3) Looking for MCP servers, Genie spaces, UC functions, or vector search indexes, (4) User says 'discover', 'find resources', or 'what can I connect to'."
----
-
-# Discover Available Tools
-
-**Run tool discovery BEFORE writing agent code** to understand what resources are available in the workspace.
-
-## Run Discovery
-
-```bash
-uv run discover-tools
-```
-
-**Options:**
-```bash
-# Limit to specific catalog/schema
-uv run discover-tools --catalog my_catalog --schema my_schema
-
-# Output as JSON
-uv run discover-tools --format json --output tools.json
-
-# Save markdown report
-uv run discover-tools --output tools.md
-
-# Use specific Databricks profile
-uv run discover-tools --profile DEFAULT
-```
-
-## What Gets Discovered
-
-| Resource Type | Description | MCP URL Pattern |
-|--------------|-------------|-----------------|
-| **UC Functions** | SQL UDFs as agent tools | `{host}/api/2.0/mcp/functions/{catalog}/{schema}` |
-| **UC Tables** | Structured data for querying | (via UC functions) |
-| **Vector Search Indexes** | RAG applications | `{host}/api/2.0/mcp/vector-search/{catalog}/{schema}` |
-| **Genie Spaces** | Natural language data interface | `{host}/api/2.0/mcp/genie/{space_id}` |
-| **Custom MCP Servers** | Apps starting with `mcp-*` | `{app_url}/mcp` |
-| **External MCP Servers** | Via UC connections | `{host}/api/2.0/mcp/external/{connection_name}` |
-
-## Next Steps
-
-After discovering tools:
-1. **Add MCP servers to your agent** - See **modify-agent** skill for SDK-specific code examples
-2. **Grant permissions** in `databricks.yml` - See **add-tools** skill for YAML snippets
-3. **Test locally** with `uv run start-app` - See **run-locally** skill
diff --git a/agent-supervisor-api/.claude/skills/lakebase-setup/SKILL.md b/agent-supervisor-api/.claude/skills/lakebase-setup/SKILL.md
deleted file mode 100644
index 2dfbc9c3..00000000
--- a/agent-supervisor-api/.claude/skills/lakebase-setup/SKILL.md
+++ /dev/null
@@ -1,392 +0,0 @@
----
-name: lakebase-setup
-description: "Configure Lakebase for agent memory storage. Use when: (1) Adding memory capabilities to the agent, (2) 'Failed to connect to Lakebase' errors, (3) Permission errors on checkpoint/store tables, (4) User says 'lakebase', 'memory setup', or 'add memory'."
----
-
-# Lakebase Setup for Agent Persistence
-
-> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks <command> --profile <profile>` or `DATABRICKS_CONFIG_PROFILE=<profile> databricks <command>`
-
-## Overview
-
-Lakebase provides persistent PostgreSQL storage for agents:
-- **Short-term memory** (LangGraph): Conversation history within a thread (`AsyncCheckpointSaver`)
-- **Long-term memory** (LangGraph): User facts across sessions (`AsyncDatabricksStore`)
-- **Long-running agent persistence** (OpenAI SDK): Background task state via custom SQLAlchemy tables (`agent_server` schema)
-
-> **Note:** For pre-configured memory templates, see:
-> - `agent-langgraph-short-term-memory` - Conversation history within a session
-> - `agent-langgraph-long-term-memory` - User facts that persist across sessions
-> - `agent-openai-agents-sdk-long-running-agent` - Background tasks with Lakebase persistence
-
-## Complete Setup Workflow
-
-```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│  1. Add dependency  →  2. Get instance  →  3. Configure DAB               │
-│  4. Configure .env  →  5. Initialize tables  →  6. Deploy + Run      │
-└─────────────────────────────────────────────────────────────────────────────┘
-```
-
----
-
-## Step 1: Add Memory Dependency
-
-Add the memory extra to your `pyproject.toml`:
-
-```toml
-dependencies = [
-    "databricks-langchain[memory]",
-    # ... other dependencies
-]
-```
-
-Then sync dependencies:
-```bash
-uv sync
-```
-
----
-
-## Step 2: Create or Get Lakebase Instance
-
-### Option A: Create New Instance (via Databricks UI)
-
-1. Go to your Databricks workspace
-2. Navigate to **Compute** → **Lakebase**
-3. Click **Create Instance**
-4. Note the instance name
-
-### Option B: Use Existing Instance
-
-If you have an existing instance, note its name for the next step.
-
----
-
-## Step 3: Configure databricks.yml (Lakebase Resource)
-
-Add the Lakebase `database` resource to your app in `databricks.yml`:
-
-```yaml
-resources:
-  apps:
-    agent_langgraph:
-      name: "your-app-name"
-      source_code_path: ./
-
-      resources:
-        # ... other resources (experiment, UC functions, etc.) ...
-
-        # Lakebase instance for long-term memory
-        - name: 'database'
-          database:
-            instance_name: '<your-lakebase-instance-name>'
-            database_name: 'databricks_postgres'
-            permission: 'CAN_CONNECT_AND_CREATE'
-```
-
-**Important:**
-- The `instance_name: '<your-lakebase-instance-name>'` must match the actual Lakebase instance name
-- Using the `database` resource type automatically grants the app's service principal access to Lakebase
-
-### Add Environment Variables to databricks.yml config block
-
-Add the Lakebase environment variables to the `config.env` section of your app in `databricks.yml`:
-
-```yaml
-      config:
-        command: ["uv", "run", "start-app"]
-        env:
-          # ... other env vars ...
-
-          # Lakebase instance name - resolved from database resource at deploy time
-          - name: LAKEBASE_INSTANCE_NAME
-            value_from: "database"
-
-          # Static values for embedding configuration
-          - name: EMBEDDING_ENDPOINT
-            value: "databricks-gte-large-en"
-          - name: EMBEDDING_DIMS
-            value: "1024"
-```
-
-**Important:**
-- The `LAKEBASE_INSTANCE_NAME` uses `value_from: "database"` which resolves from the `database` resource at deploy time
-- The `database` resource handles permissions; the `config.env` provides the instance name to your code
-
----
-
-## Step 4: Configure .env (Local Development)
-
-For local development, add to `.env`:
-
-```bash
-# Lakebase configuration for long-term memory
-LAKEBASE_INSTANCE_NAME=<your-instance-name>
-EMBEDDING_ENDPOINT=databricks-gte-large-en
-EMBEDDING_DIMS=1024
-```
-
-**Important:** `embedding_dims` must match the embedding endpoint:
-
-| Endpoint | Dimensions |
-|----------|------------|
-| `databricks-gte-large-en` | 1024 |
-| `databricks-bge-large-en` | 1024 |
-
-> **Note:** `.env` is only for local development. When deployed, the app gets `LAKEBASE_INSTANCE_NAME` from the `value_from` reference in the `databricks.yml` config block.
-
----
-
-## Step 5: Initialize Tables
-
-### Option A: LangGraph Memory Templates (public schema)
-
-**Before deploying**, initialize the Lakebase tables. The `AsyncDatabricksStore` creates tables on first use, but you need to do this locally first:
-
-```bash
-DATABRICKS_CONFIG_PROFILE=<profile> uv run python -c "$(cat <<'EOF'
-import asyncio
-from databricks_langchain import AsyncDatabricksStore
-
-async def setup():
-    async with AsyncDatabricksStore(
-        instance_name="<your-instance-name>",
-        embedding_endpoint="databricks-gte-large-en",
-        embedding_dims=1024,
-    ) as store:
-        await store.setup()
-        print("Tables created!")
-
-asyncio.run(setup())
-EOF
-)"
-```
-
-This creates these tables in the `public` schema:
-- `store` - Key-value storage for memories
-- `store_vectors` - Vector embeddings for semantic search
-- `store_migrations` - Schema migration tracking
-- `vector_migrations` - Vector schema migration tracking
-
-### Option B: Long-Running Agent Templates (agent_server schema)
-
-The long-running agent uses SQLAlchemy with a custom `agent_server` schema. Tables are created automatically on app startup via `CREATE SCHEMA IF NOT EXISTS agent_server` and `Base.metadata.create_all`. No manual table initialization is needed.
-
-Tables created in the `agent_server` schema:
-- `responses` - Response status tracking for background agent tasks
-- `messages` - Stream events and output items for responses
-
----
-
-## Step 6: Grant SP Permissions (CRITICAL for deployed apps)
-
-After deploying, the app's service principal needs Postgres roles to access Lakebase tables. The DAB `database` resource with `CAN_CONNECT_AND_CREATE` grants basic connectivity, but you must also grant Postgres-level schema and table permissions.
-
-**Step 1:** Get the app's service principal client ID:
-```bash
-DATABRICKS_CONFIG_PROFILE=<profile> databricks apps get <app-name> --output json | jq -r '.service_principal_client_id'
-```
-
-**Step 2:** Grant permissions using `LakebaseClient`:
-
-```bash
-DATABRICKS_CONFIG_PROFILE=<profile> uv run python -c "
-from databricks_ai_bridge.lakebase import LakebaseClient, SchemaPrivilege, TablePrivilege
-
-client = LakebaseClient(instance_name='<your-instance-name>')
-sp_id = '<service-principal-client-id>'  # UUID from step 1
-
-# Create role (must do first)
-client.create_role(sp_id, 'SERVICE_PRINCIPAL')
-
-# Grant schema privileges
-client.grant_schema(
-    grantee=sp_id,
-    schemas=['<schema-name>'],  # 'public' for LangGraph, 'agent_server' for long-running agent
-    privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE],
-)
-
-# Grant table privileges
-client.grant_table(
-    grantee=sp_id,
-    tables=['<schema>.<table1>', '<schema>.<table2>'],
-    privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, TablePrivilege.UPDATE, TablePrivilege.DELETE],
-)
-
-print('Done!')
-"
-```
-
-### LangGraph Memory Templates
-
-Grant on `public` schema:
-```python
-client.grant_schema(grantee=sp_id, schemas=['public'], privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE])
-client.grant_table(grantee=sp_id, tables=['public.store', 'public.store_vectors'], privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, TablePrivilege.UPDATE, TablePrivilege.DELETE])
-```
-
-### Long-Running Agent Templates
-
-Grant on `agent_server` schema:
-```python
-client.grant_schema(grantee=sp_id, schemas=['agent_server'], privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE])
-client.grant_table(grantee=sp_id, tables=['agent_server.responses', 'agent_server.messages'], privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, TablePrivilege.UPDATE, TablePrivilege.DELETE])
-```
-
----
-
-## Step 7: Deploy and Run Your App
-
-**IMPORTANT:** Always run both `deploy` AND `run` commands:
-
-```bash
-# Deploy resources and upload files
-DATABRICKS_CONFIG_PROFILE=<profile> databricks bundle deploy
-
-# Start/restart the app with new code (REQUIRED!)
-DATABRICKS_CONFIG_PROFILE=<profile> databricks bundle run {{BUNDLE_NAME}}
-```
-
-> **Note:** `bundle deploy` only uploads files and configures resources. `bundle run` is required to actually start the app with the new code.
-
----
-
-## Complete Example: databricks.yml with Lakebase
-
-```yaml
-bundle:
-  name: agent_langgraph
-
-resources:
-  experiments:
-    agent_langgraph_experiment:
-      name: /Users/${workspace.current_user.userName}/${bundle.name}-${bundle.target}
-
-  apps:
-    agent_langgraph:
-      name: "my-agent-app"
-      description: "Agent with long-term memory"
-      source_code_path: ./
-      config:
-        command: ["uv", "run", "start-app"]
-        env:
-          - name: MLFLOW_TRACKING_URI
-            value: "databricks"
-          - name: MLFLOW_REGISTRY_URI
-            value: "databricks-uc"
-          - name: API_PROXY
-            value: "http://localhost:8000/invocations"
-          - name: CHAT_APP_PORT
-            value: "3000"
-          - name: CHAT_PROXY_TIMEOUT_SECONDS
-            value: "300"
-          # Reference experiment resource
-          - name: MLFLOW_EXPERIMENT_ID
-            value_from: "experiment"
-          # Lakebase instance name (resolved from database resource)
-          - name: LAKEBASE_INSTANCE_NAME
-            value_from: "database"
-          # Embedding configuration
-          - name: EMBEDDING_ENDPOINT
-            value: "databricks-gte-large-en"
-          - name: EMBEDDING_DIMS
-            value: "1024"
-
-      resources:
-        - name: 'experiment'
-          experiment:
-            experiment_id: "${resources.experiments.agent_langgraph_experiment.id}"
-            permission: 'CAN_MANAGE'
-
-        # Lakebase instance for long-term memory
-        - name: 'database'
-          database:
-            instance_name: '<your-lakebase-instance-name>'
-            database_name: 'databricks_postgres'
-            permission: 'CAN_CONNECT_AND_CREATE'
-
-targets:
-  dev:
-    mode: development
-    default: true
-```
-
----
-
-## Troubleshooting
-
-| Issue | Cause | Solution |
-|-------|-------|----------|
-| **"embedding_dims is required when embedding_endpoint is specified"** | Missing `embedding_dims` parameter | Add `embedding_dims=1024` to AsyncDatabricksStore |
-| **"relation 'store' does not exist"** | Tables not initialized | Run `await store.setup()` locally first (Step 5) |
-| **"Unable to resolve Lakebase instance 'None'"** | Missing env var in deployed app | Add `LAKEBASE_INSTANCE_NAME` to databricks.yml `config.env` |
-| **"Unable to resolve Lakebase instance '...database.cloud.databricks.com'"** | Used value_from instead of value | Use `value: "<instance-name>"` not `value_from` for Lakebase |
-| **"permission denied for table store"** | Missing grants | The `database` resource in DAB should handle this; verify the resource is configured |
-| **"Failed to connect to Lakebase"** | Wrong instance name | Verify instance name in databricks.yml and .env |
-| **Connection pool errors on exit** | Python cleanup race | Ignore `PythonFinalizationError` - it's harmless |
-| **App not updated after deploy** | Forgot to run bundle | Run `databricks bundle run agent_langgraph` after deploy |
-| **value_from not resolving** | Resource name mismatch | Ensure `value_from` value matches `name` in databricks.yml resources |
-
----
-
-## Quick Reference: LakebaseClient API
-
-For manual permission management (usually not needed with DAB `database` resource):
-
-```python
-from databricks_ai_bridge.lakebase import LakebaseClient, SchemaPrivilege, TablePrivilege
-
-client = LakebaseClient(instance_name="...")
-
-# Create role (must do first)
-client.create_role(identity_name, "SERVICE_PRINCIPAL")
-
-# Grant schema (note: schemas is a list, grantee not role)
-client.grant_schema(
-    grantee="...",
-    schemas=["public"],
-    privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE],
-)
-
-# Grant tables (note: tables includes schema prefix)
-client.grant_table(
-    grantee="...",
-    tables=["public.store"],
-    privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, ...],
-)
-
-# Execute raw SQL
-client.execute("SELECT * FROM pg_tables WHERE schemaname = 'public'")
-```
-
-### Service Principal Identifiers
-
-When granting permissions manually, note that Databricks apps have multiple identifiers:
-
-| Field | Format | Example |
-|-------|--------|---------|
-| `service_principal_id` | Numeric ID | `1234567890123456` |
-| `service_principal_client_id` | UUID | `a1b2c3d4-e5f6-7890-abcd-ef1234567890` |
-| `service_principal_name` | String name | `my-app-service-principal` |
-
-**Get all identifiers:**
-```bash
-DATABRICKS_CONFIG_PROFILE=<profile> databricks apps get <app-name> --output json | jq '{
-  id: .service_principal_id,
-  client_id: .service_principal_client_id,
-  name: .service_principal_name
-}'
-```
-
-**Which to use:**
-- `LakebaseClient.create_role()` - Use `service_principal_client_id` (UUID) or `service_principal_name`
-- Raw SQL grants - Use `service_principal_client_id` (UUID)
-
----
-
-## Next Steps
-
-- Add memory to agent code: see **agent-memory** skill
-- Test locally: see **run-locally** skill
-- Deploy: see **deploy** skill
diff --git a/agent-supervisor-api/.claude/skills/migrate-from-model-serving/SKILL.md b/agent-supervisor-api/.claude/skills/migrate-from-model-serving/SKILL.md
deleted file mode 100644
index 4287b4af..00000000
--- a/agent-supervisor-api/.claude/skills/migrate-from-model-serving/SKILL.md
+++ /dev/null
@@ -1,965 +0,0 @@
----
-name: migrate-from-model-serving
-description: "Migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps. Use when: (1) User wants to migrate from Model Serving to Apps, (2) User has a ResponsesAgent with predict()/predict_stream() methods, (3) User wants to convert to @invoke/@stream decorators."
----
-
-# Model Serving to Databricks Apps Migration Guide
-
-This guide instructs LLM coding agents how to migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps.
-
----
-
-## Overview
-
-**Goal:** Migrate an agent deployed on Databricks Model Serving (using `ResponsesAgent` with `predict()`/`predict_stream()`) to Databricks Apps (using MLflow GenAI Server with `@invoke`/`@stream` decorators).
-
-**Key Transformation:**
-- Model Serving: Synchronous `predict()` and `predict_stream()` methods on a class
-- Apps: Functions with `@invoke` and `@stream` decorators (sync or async, based on user preference)
-
-**Deliverables:** After migration is complete, you will have:
-
-```
-<working-directory>/
-├── original_mlflow_model/    # Downloaded artifacts from Model Serving
-│   ├── MLmodel
-│   ├── code/
-│   │   └── agent.py
-│   ├── input_example.json
-│   └── requirements.txt
-│
-└── <app-name>/               # New Databricks App (ready to deploy)
-    ├── agent_server/
-    │   ├── agent.py          # Migrated agent code
-    │   └── ...
-    ├── databricks.yml        # Bundle config with resources
-    ├── pyproject.toml
-    ├── requirements.txt
-    └── ...
-```
-
-> **`<app-name>`** is the name the user provides at the start of the migration. It is used as both the directory name and the Databricks App name at deploy time.
-
----
-
-## Before You Begin: Gather User Inputs
-
-**Before doing anything else, ask the user three questions.** Use the `AskUserQuestion` tool to collect all answers at once so the user is only prompted once, then Claude can execute the rest of the migration autonomously.
-
-**Questions to ask:**
-
-1. **Databricks profile:** Which Databricks CLI profile should be used for the workspace where the Model Serving endpoint lives? (Run `databricks auth profiles` first to list available profiles and their workspaces, then present the options to the user.)
-2. **App name:** What should the new Databricks App be named? (Must be lowercase, can contain letters, numbers, and hyphens, and must be unique within the workspace.)
-3. **Async migration:** Would you like to migrate your agent code to be fully async?
-   - **Yes (Recommended):** Converts all I/O operations to async (`await`/`async for`), enabling higher concurrency on smaller compute — no more threads sitting idle while waiting for LLM responses or long-running tool calls.
-   - **No:** Keeps your existing synchronous code with minimal changes — just extracts the logic from the `ResponsesAgent` class and wraps it with `@invoke`/`@stream` decorators. Simpler migration, but each request blocks a thread while waiting for I/O.
-
-Store the answers as:
-- `<profile>` — used for ALL `databricks` CLI commands throughout the migration (via `--profile <profile>`)
-- `<app-name>` — used as both the directory name for the migrated app AND the app name when deploying with `databricks bundle deploy`
-- `<async>` — `yes` or `no`, determines whether to convert the agent code to async or keep it synchronous
-
-### Validate Authentication
-
-After receiving the user's answers, validate the selected profile:
-
-```bash
-databricks current-user me --profile <profile>
-```
-
-If this fails with an authentication error, prompt the user to re-authenticate:
-
-```bash
-databricks auth login --profile <profile>
-```
-
-> **Important:** Remember to include `--profile <profile>` on every `databricks` CLI command throughout the migration.
-
-### Create the App Directory
-
-Copy all scaffold files from the current working directory into a new directory named `<app-name>/`. Exclude instruction files (`AGENTS.md`, `CLAUDE.md`), hidden directories (`.claude/`, `.git/`), and any migration artifacts (e.g., `original_mlflow_model/`, `.migration-venv/`). Do NOT search for or copy scaffold files from other directories or templates — everything you need is right here.
-
-All subsequent migration steps operate inside the `<app-name>/` directory.
-
-> **Note:** The `agent_server/agent.py` scaffold is intentionally framework-agnostic — it contains the `@invoke`/`@stream` decorator pattern with TODO placeholders. Step 3 (Migrate the Agent Code) will replace these placeholders with the actual agent logic from the original Model Serving endpoint.
-
-### Create Task List
-
-**Create a task list to track progress.** This helps the user follow along and see what's completed, in progress, and pending.
-
-> **User tip:** Press `Ctrl+T` to toggle the task list view in your terminal. The display shows up to 10 tasks at a time with status indicators.
-
-Create the following tasks using the `TaskCreate` tool:
-
-| Task | Description |
-|------|-------------|
-| **Authenticate to Databricks** | Verify Databricks CLI authentication and validate the selected profile |
-| **Download original agent artifacts** | Download the MLflow model artifacts from Model Serving endpoint |
-| **Analyze and understand agent code** | Examine the original agent code, identify tools, resources, and dependencies |
-| **Migrate agent code to Apps format** | Transform ResponsesAgent class to @invoke/@stream decorated functions |
-| **Set up and configure the app** | Install dependencies, run quickstart, configure environment |
-| **Test agent locally** | Start local server and verify the agent works correctly |
-| **Deploy to Databricks Apps** | Configure databricks.yml resources and deploy with Databricks Asset Bundles |
-| **Test deployed app** | Verify the deployed app responds correctly |
-
-Update task status as you progress:
-- Mark tasks as `in_progress` when starting each step
-- Mark tasks as `completed` when finished
-- This gives the user visibility into migration progress
-
----
-
-## Step 1: Download the Original Agent Code
-
-> **Task:** Mark "Authenticate to Databricks" as `completed`. Mark "Download original agent artifacts" as `in_progress`.
->
-> **Note:** The `<profile>` and `<app-name>` values were collected from the user in the "Before You Begin" section. Use them throughout.
-
-Download the original agent code from the Model Serving endpoint. This requires setting up a virtual environment with MLflow to access the model artifacts.
-
-### 1.1 Get Model Info from Endpoint
-
-If you have a serving endpoint name, extract the model details:
-
-```bash
-# Get endpoint info (remember to include --profile if using non-default)
-databricks serving-endpoints get <endpoint-name> --profile <profile> --output json
-```
-
-Look for `served_entities[0].entity_name` (model name) and `entity_version` in the response. Find the entity with 100% traffic in `traffic_config.routes`.
-
-### 1.2 Download Model Artifacts
-
-Use `uv run --with` to download artifacts without creating a separate virtual environment. The `mlflow[databricks]` extra includes `boto3` for Unity Catalog artifact access:
-
-```bash
-DATABRICKS_CONFIG_PROFILE=<profile> uv run --no-project \
-  --with "mlflow[databricks]>=2.15.0" \
-  --with "databricks-sdk>=0.30.0" \
-  python3 << 'EOF'
-import mlflow
-
-mlflow.set_tracking_uri("databricks")
-
-# Replace with actual values from step 1.1
-MODEL_NAME = "<model-name>"
-VERSION = "<version>"
-
-print(f"Downloading model: models:/{MODEL_NAME}/{VERSION}")
-mlflow.artifacts.download_artifacts(
-    artifact_uri=f"models:/{MODEL_NAME}/{VERSION}",
-    dst_path="./original_mlflow_model"
-)
-print("Download complete! Artifacts saved to ./original_mlflow_model")
-EOF
-```
-
-### 1.3 Verify Downloaded Artifacts
-
-Check that the key files exist and understand the full structure:
-
-```bash
-# List all downloaded files recursively
-find ./original_mlflow_model -type f | head -50
-
-# Check for MLmodel file (contains resource requirements)
-cat ./original_mlflow_model/MLmodel
-
-# Check for input example (useful for testing)
-cat ./original_mlflow_model/input_example.json 2>/dev/null
-```
-
-**Examine the `/code` folder** - contains all code dependencies logged via `code_paths=["..."]`:
-
-```bash
-# List all code files
-ls -la ./original_mlflow_model/code/
-
-# The main agent is typically agent.py, but there may be additional modules
-find ./original_mlflow_model/code -name "*.py" -type f
-```
-
-**Examine the `/artifacts` folder** (if present) - contains artifacts logged via `artifacts={...}`:
-
-```bash
-# Check for artifacts folder
-ls -la ./original_mlflow_model/artifacts/ 2>/dev/null
-
-# List all artifacts
-find ./original_mlflow_model/artifacts -type f 2>/dev/null
-```
-
-> **Important:** Take note of ALL files in `/code` and `/artifacts`. You will need to copy these to the migrated app and ensure imports still work correctly.
-
-### Expected Output Structure
-
-After successful download, you should have:
-
-```
-./original_mlflow_model/
-├── MLmodel              # Model metadata and resource requirements
-├── code/                # Code logged via code_paths=["..."]
-│   ├── agent.py         # Main agent implementation
-│   ├── utils.py         # (optional) Helper modules
-│   ├── tools.py         # (optional) Custom tool definitions
-│   └── ...              # Any other code dependencies
-├── artifacts/           # (optional) Artifacts logged via artifacts={...}
-│   ├── config.yaml      # (optional) Configuration files
-│   ├── prompts/         # (optional) Prompt templates
-│   └── ...              # Any other artifacts (data files, etc.)
-├── input_example.json   # Sample request for testing
-├── requirements.txt     # Original dependencies
-└── ...
-```
-
-### Key Files to Examine
-
-1. **`code/agent.py`** - Contains the `ResponsesAgent` class with `predict()` and `predict_stream()` methods
-2. **`code/*.py`** - Any additional Python modules the agent imports
-3. **`MLmodel`** - Contains the `resources` section listing required Databricks resources
-4. **`artifacts/`** - Any configuration files, prompts, or data files the agent uses
-5. **`input_example.json`** - Use this to test the migrated agent
-
-### Troubleshooting Model Download
-
-**"Unable to import necessary dependencies to access model version files in Unity Catalog"**
-This means `boto3` is missing. Ensure you're using `mlflow[databricks]` (not just `mlflow`) in the `--with` flag — the `[databricks]` extra includes `boto3`.
-
-**"INVALID_PARAMETER_VALUE" or authentication errors**
-Re-authenticate with Databricks (include profile if non-default):
-```bash
-databricks auth login --profile <profile>
-```
-
-**Wrong workspace / Model not found**
-Make sure you're using the correct profile that corresponds to the workspace where the model is deployed:
-```bash
-# List profiles to see which workspace each points to
-databricks auth profiles
-
-# Verify you can access the workspace
-databricks current-user me --profile <profile>
-
-# List models in that workspace
-databricks registered-models list --profile <profile>
-databricks model-versions list --name "<model-name>" --profile <profile>
-```
-
----
-
-## Step 2: Understand the Key Transformations
-
-> **Task:** Mark "Download original agent artifacts" as `completed`. Mark "Analyze and understand agent code" as `in_progress`.
-
-### Entry Point Transformation
-
-In both cases, the `ResponsesAgent` class is replaced with decorated functions. The difference is whether those functions are async or sync.
-
-**Model Serving (OLD):**
-```python
-from mlflow.pyfunc import ResponsesAgent, ResponsesAgentRequest, ResponsesAgentResponse
-
-class MyAgent(ResponsesAgent):
-    def predict(self, request: ResponsesAgentRequest, params=None) -> ResponsesAgentResponse:
-        # Synchronous implementation
-        ...
-        return ResponsesAgentResponse(output=outputs)
-
-    def predict_stream(self, request: ResponsesAgentRequest, params=None):
-        # Synchronous generator
-        for chunk in ...:
-            yield ResponsesAgentStreamEvent(...)
-```
-
-**Apps — Async (if `<async>` = yes):**
-```python
-from mlflow.genai.agent_server import invoke, stream
-from mlflow.types.responses import (
-    ResponsesAgentRequest,
-    ResponsesAgentResponse,
-    ResponsesAgentStreamEvent,
-)
-
-@invoke()
-async def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
-    # Async implementation - typically calls streaming() and collects results
-    outputs = [
-        event.item
-        async for event in streaming(request)
-        if event.type == "response.output_item.done"
-    ]
-    return ResponsesAgentResponse(output=outputs)
-
-@stream()
-async def streaming(request: ResponsesAgentRequest) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
-    # Async generator
-    async for event in ...:
-        yield event
-```
-
-**Apps — Sync (if `<async>` = no):**
-```python
-from mlflow.genai.agent_server import invoke, stream
-from mlflow.types.responses import (
-    ResponsesAgentRequest,
-    ResponsesAgentResponse,
-    ResponsesAgentStreamEvent,
-)
-
-@invoke()
-def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
-    # Same sync logic from original predict(), extracted from the class
-    ...
-    return ResponsesAgentResponse(output=outputs)
-
-@stream()
-def streaming(request: ResponsesAgentRequest):
-    # Same sync generator from original predict_stream(), extracted from the class
-    for chunk in ...:
-        yield ResponsesAgentStreamEvent(...)
-```
-
-### Key Differences
-
-| Aspect | Model Serving | Apps (async) | Apps (sync) |
-|--------|--------------|------|------|
-| Structure | `class MyAgent(ResponsesAgent)` | Decorated functions | Decorated functions |
-| Functions | `def predict()` / `def predict_stream()` | `async def` with `await` | `def` (same as original) |
-| Streaming | Sync generator (`yield`) | Async generator (`async for` / `yield`) | Sync generator (`yield`) |
-| Server | MLflow Model Server | MLflow GenAI Server (FastAPI) | MLflow GenAI Server (FastAPI) |
-| Deployment | `databricks_agents.deploy()` | `databricks bundle deploy` + `bundle run` | `databricks bundle deploy` + `bundle run` |
-
-### Async Patterns (only if `<async>` = yes)
-
-> **Skip this section if the user chose synchronous migration.** The sync path keeps all original I/O calls as-is.
-
-All I/O operations must be converted to async:
-
-```python
-# OLD (sync)
-response = client.chat(messages)
-
-# NEW (async)
-response = await client.achat(messages)
-
-# OLD (sync iteration)
-for chunk in stream:
-    yield chunk
-
-# NEW (async iteration)
-async for chunk in stream:
-    yield chunk
-```
-
----
-
-## Step 3: Migrate the Agent Code
-
-> **Task:** Mark "Analyze and understand agent code" as `completed`. Mark "Migrate agent code to Apps format" as `in_progress`.
-
-### 3.1 Copy Code Dependencies and Artifacts
-
-The original MLflow model may contain multiple code files and artifacts that need to be migrated.
-
-**Copy all code files from `/code` to `agent_server/`:**
-
-```bash
-# Copy all Python files from original code folder
-cp ./original_mlflow_model/code/*.py ./<app-name>/agent_server/
-
-# If there are subdirectories with code, copy those too
-# cp -r ./original_mlflow_model/code/submodule ./<app-name>/agent_server/
-```
-
-**Copy artifacts (if present):**
-
-```bash
-# Create an artifacts directory in the migrated app if needed
-mkdir -p ./<app-name>/agent_server/artifacts
-
-# Copy all artifacts
-cp -r ./original_mlflow_model/artifacts/* ./<app-name>/agent_server/artifacts/ 2>/dev/null || true
-```
-
-**Fix import paths after copying:**
-
-When code files are moved, imports may break. Check and update imports in all copied files:
-
-```python
-# BEFORE (if files were in different locations):
-from code.utils import helper_function
-from artifacts.prompts import SYSTEM_PROMPT
-
-# AFTER (files are now in agent_server/):
-from agent_server.utils import helper_function
-# Or if in same directory:
-from .utils import helper_function
-
-# For artifacts, update file paths:
-# BEFORE:
-with open("artifacts/config.yaml") as f:
-# AFTER:
-import os
-config_path = os.path.join(os.path.dirname(__file__), "artifacts", "config.yaml")
-with open(config_path) as f:
-```
-
-> **Important:** Review each copied file and ensure all imports resolve correctly. The most common issues are:
-> - Relative imports that assumed a different directory structure
-> - Hardcoded file paths to artifacts
-> - Missing `__init__.py` files for package imports
-
-### 3.2 Extract Configuration
-
-From the original agent code, identify and preserve:
-- **LLM endpoint name** (e.g., `databricks-claude-sonnet-4-5`)
-- **System prompt**
-- **Tool definitions**
-- **Any custom logic**
-
-### 3.3 Update the Agent Entry Point
-
-The approach depends on whether the user chose async or sync migration.
-
----
-
-#### Path A: Synchronous Migration (`<async>` = no)
-
-This is the minimal-changes path. Extract the logic from the `ResponsesAgent` class, wrap it with `@invoke`/`@stream` decorators, and keep all code synchronous.
-
-Edit `<app-name>/agent_server/agent.py`:
-
-1. **Replace the scaffold with the original agent logic.** The core transformation is extracting the class methods into decorated functions:
-
-```python
-from mlflow.genai.agent_server import invoke, stream
-from mlflow.types.responses import (
-    ResponsesAgentRequest,
-    ResponsesAgentResponse,
-    ResponsesAgentStreamEvent,
-)
-
-# Move any class __init__ or class-level setup to module level
-# e.g., client initialization, tool setup, etc.
-
-@invoke()
-def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
-    # Paste the body of the original predict() method here
-    # Remove 'self.' references — replace with module-level variables
-    # Remove 'params' parameter (not used in Apps)
-    ...
-    return ResponsesAgentResponse(output=outputs)
-
-@stream()
-def streaming(request: ResponsesAgentRequest):
-    # Paste the body of the original predict_stream() method here
-    # Remove 'self.' references — replace with module-level variables
-    # Remove 'params' parameter (not used in Apps)
-    for chunk in ...:
-        yield ResponsesAgentStreamEvent(...)
-```
-
-2. **Key changes from class to functions:**
-   - Remove the `class MyAgent(ResponsesAgent):` wrapper
-   - Remove `self` parameter from all methods
-   - Move `__init__` logic (client creation, tool setup) to module-level code
-   - Replace `self.some_attribute` with module-level variables
-   - Add `@invoke()` decorator to the non-streaming function
-   - Add `@stream()` decorator to the streaming function
-
-3. **Keep all other code as-is** — no need to convert sync calls to async, no need to change `for` to `async for`, no need to add `await`.
-
----
-
-#### Path B: Async Migration (`<async>` = yes)
-
-This path converts all I/O operations to async for higher concurrency. More changes are required, but the result is a more efficient server.
-
-Edit `<app-name>/agent_server/agent.py`:
-
-1. **Update the LLM endpoint:**
-   ```python
-   LLM_ENDPOINT_NAME = "<your-endpoint-from-original>"
-   ```
-
-2. **Update the system prompt:**
-   ```python
-   SYSTEM_PROMPT = """<your-system-prompt-from-original>"""
-   ```
-
-3. **Add your custom tools:**
-   If your original agent had custom tools, add them:
-   ```python
-   from langchain_core.tools import tool
-
-   @tool
-   async def my_custom_tool(arg: str) -> str:
-       """Tool description."""
-       # Your tool logic (make async if needed)
-       return result
-   ```
-
-4. **Convert all I/O to async:**
-   - `def predict()` → `async def non_streaming()`
-   - `def predict_stream()` → `async def streaming()`
-   - `client.chat()` → `await client.achat()`
-   - `for chunk in stream:` → `async for chunk in stream:`
-   - Sync HTTP calls → `await` async equivalents
-
-5. **Preserve any special logic:**
-   Migrate any custom preprocessing, postprocessing, or business logic from the original agent.
-
----
-
-### 3.4 Handle Stateful Agents
-
-**If original uses checkpointer (short-term memory):**
-- Add checkpointer with Lakebase integration (use `AsyncCheckpointSaver` if async, or sync equivalent if sync)
-- Configure `LAKEBASE_INSTANCE_NAME` in `.env`
-- Extract thread_id from `request.custom_inputs` or `request.context.conversation_id`
-
-**If original uses store (long-term memory):**
-- Add store with Lakebase integration (use `AsyncDatabricksStore` if async, or sync equivalent if sync)
-- Configure `LAKEBASE_INSTANCE_NAME` in `.env`
-- Extract user_id from `request.custom_inputs` or `request.context.user_id`
-
----
-
-## Step 4: Set Up the App
-
-> **Task:** Mark "Migrate agent code to Apps format" as `completed`. Mark "Set up and configure the app" as `in_progress`.
-
-### 4.1 Verify Build Configuration
-
-Before installing dependencies, ensure a README file exists (hatchling requires this):
-
-**Ensure a README file exists:**
-
-```bash
-# Create a minimal README if one doesn't exist
-if [ ! -f "README.md" ]; then
-  echo "# Migrated Agent App" > README.md
-fi
-```
-
-### 4.2 Install Dependencies
-
-```bash
-cd <app-name>
-uv sync
-```
-
-### 4.3 Create requirements.txt for Databricks Apps
-
-Databricks Apps requires a `requirements.txt` file with `uv` to install dependencies from `pyproject.toml`:
-
-```bash
-echo "uv" > requirements.txt
-```
-
-### 4.4 Run Quickstart
-
-Run the `uv run quickstart` script to quickly set up your local environment. This is the **recommended** way to configure the app as it handles all necessary setup automatically.
-
-```bash
-uv run quickstart
-```
-
-This script will:
-
-1. Verify uv, nvm, and Databricks CLI installations
-2. Configure Databricks authentication
-3. Configure agent tracing, by creating and linking an MLflow experiment to your app
-4. Configure `.env` with the necessary environment variables
-
-> **Important:** The quickstart script creates the MLflow experiment that the app needs for logging traces and models. This experiment will be added as a resource when deploying the app.
-
-If there are issues with the quickstart script, refer to the manual setup in section 4.5.
-
-### 4.5 Manual Environment Configuration (Optional)
-
-If you need to manually configure the environment or add additional variables, edit `.env`:
-
-```bash
-# Databricks authentication
-DATABRICKS_CONFIG_PROFILE=<your-profile>
-
-# MLflow experiment (created by quickstart, or create manually)
-MLFLOW_EXPERIMENT_ID=<experiment-id>
-
-# Example: Lakebase for stateful agents
-LAKEBASE_INSTANCE_NAME=<your-lakebase-instance>
-
-# Example: Custom API keys
-MY_API_KEY=<value>
-```
-
-To manually create an MLflow experiment:
-
-```bash
-databricks experiments create-experiment "/Users/<your-username>/<app-name>" --profile <profile>
-```
-
----
-
-## Step 5: Test Locally
-
-> **Task:** Mark "Set up and configure the app" as `completed`. Mark "Test agent locally" as `in_progress`.
-
-> Test your migrated agent locally before deploying to Databricks Apps. This helps catch configuration issues early and ensures the agent works correctly.
-
-### 5.1 Start the Server
-
-After the quickstart setup is complete, start the agent server and chat app locally:
-
-```bash
-cd <app-name>
-uv run start-app
-```
-
-Wait for the server to start. You should see output indicating the server is running on `http://localhost:8000`.
-
-> **Note:** If you only need the API endpoint (without the chat UI), you can run `uv run start-server` instead.
-
-### 5.2 Test with Original Input Example
-
-The original model artifacts include an `input_example.json` file that contains a sample request. Use this to verify your migrated agent produces the same behavior. If there's no valid sample request then figure out a valid sample request to query agent based on its code.
-
-```bash
-# Check the original input example (from the <app-name> directory)
-cat ../original_mlflow_model/input_example.json
-```
-
-Example content:
-```json
-{"input": [{"role": "user", "content": "What is an LLM agent?"}], "custom_inputs": {"thread_id": "example-thread-123"}}
-```
-
-Test your local server with this input:
-
-```bash
-# Test with the original input example
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d "$(cat ../original_mlflow_model/input_example.json)"
-```
-
-### 5.3 Test Basic Requests
-
-```bash
-# Non-streaming
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d '{"input": [{"role": "user", "content": "Hello!"}]}'
-
-# Streaming
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d '{"input": [{"role": "user", "content": "Hello!"}], "stream": true}'
-```
-
-### 5.4 Test with Custom Inputs (for stateful agents)
-
-```bash
-# With thread_id for short-term memory
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d '{"input": [{"role": "user", "content": "Hi"}], "custom_inputs": {"thread_id": "test-123"}}'
-
-# With user_id for long-term memory
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d '{"input": [{"role": "user", "content": "Hi"}], "custom_inputs": {"user_id": "user@example.com"}}'
-```
-
-### 5.5 Verify Before Proceeding
-
-Before proceeding to deployment, ensure:
-- [ ] The server starts without errors
-- [ ] The original input example returns a valid response
-- [ ] Streaming responses work correctly
-- [ ] Custom inputs (thread_id, user_id) are handled properly (if applicable)
-
-> **Note:** Only proceed to Step 6 (Deploy) after confirming the agent works correctly locally.
-
----
-
-## Step 6: Deploy to Databricks Apps
-
-> **Task:** Mark "Test agent locally" as `completed`. Mark "Deploy to Databricks Apps" as `in_progress`.
-
-This step uses Databricks Asset Bundles (DAB) to deploy. The scaffold includes a `databricks.yml` that you need to update with the app name and resources from the original model.
-
-### 6.1 Extract Resources from Original Model
-
-The original model's `MLmodel` file contains a `resources` section that lists all Databricks resources the agent needs access to. Check `../original_mlflow_model/MLmodel` (or `./original_mlflow_model/MLmodel` if you're in the parent directory) for content like:
-
-```yaml
-resources:
-  api_version: '1'
-  databricks:
-    lakebase:
-    - name: lakebase
-    serving_endpoint:
-    - name: databricks-claude-sonnet-4-5
-```
-
-### 6.2 Update `databricks.yml` with Resources
-
-The scaffold includes a `databricks.yml` with the experiment resource pre-configured. You need to:
-
-1. **Update the app name** to `<app-name>` (the name provided by the user) in both the `resources.apps.agent_migration.name` field and the `targets.prod.resources.apps.agent_migration.name` field.
-2. **Add resources** extracted from the original MLmodel file to the `resources.apps.agent_migration.resources` list.
-
-**Resource Type Mapping (MLmodel → `databricks.yml`):**
-
-| MLmodel Resource | `databricks.yml` Resource | Key Fields |
-|------------------|--------------------------|------------|
-| `serving_endpoint` | `serving_endpoint` | `name`, `permission` (CAN_QUERY) |
-| `lakebase` | `database` | `database_name: databricks_postgres`, `instance_name`, `permission` (CAN_CONNECT_AND_CREATE) |
-| `vector_search_index` | `uc_securable` | `securable_full_name`, `securable_type: TABLE`, `permission: SELECT` |
-| `function` | `uc_securable` | `securable_full_name`, `securable_type: FUNCTION`, `permission: EXECUTE` |
-| `table` | `uc_securable` | `securable_full_name`, `securable_type: TABLE`, `permission: SELECT` |
-| `uc_connection` | `uc_securable` | `securable_full_name`, `securable_type: CONNECTION`, `permission: USE_CONNECTION` |
-| `sql_warehouse` | `sql_warehouse` | `id`, `permission` (CAN_USE) |
-| `genie_space` | `genie_space` | `space_id`, `permission` (CAN_RUN) |
-
-> **Note:** The `experiment` resource is already configured in the scaffold `databricks.yml` and is automatically created by the bundle. You do not need to add it manually.
-
-**Example: `databricks.yml` for an agent with a serving endpoint and UC function:**
-
-```yaml
-resources:
-  experiments:
-    agent_migration_experiment:
-      name: /Users/${workspace.current_user.userName}/${bundle.name}-${bundle.target}
-
-  apps:
-    agent_migration:
-      name: "<app-name>"  # Update to user's app name
-      description: "Migrated agent from Model Serving to Databricks Apps"
-      source_code_path: ./
-      resources:
-        - name: 'experiment'
-          experiment:
-            experiment_id: "${resources.experiments.agent_migration_experiment.id}"
-            permission: 'CAN_MANAGE'
-        - name: 'serving-endpoint'
-          serving_endpoint:
-            name: 'databricks-claude-sonnet-4-5'
-            permission: 'CAN_QUERY'
-        - name: 'python-exec'
-          uc_securable:
-            securable_full_name: 'system.ai.python_exec'
-            securable_type: 'FUNCTION'
-            permission: 'EXECUTE'
-
-targets:
-  prod:
-    resources:
-      apps:
-        agent_migration:
-          name: "<app-name>"  # Same name for production
-```
-
-**Example: Adding Lakebase resources (for stateful agents):**
-
-```yaml
-        - name: 'database'
-          database:
-            database_name: 'databricks_postgres'
-            instance_name: 'lakebase'
-            permission: 'CAN_CONNECT_AND_CREATE'
-```
-
-### 6.3 Deploy with Databricks Asset Bundles
-
-From inside the `<app-name>` directory, validate, deploy, and run:
-
-```bash
-# 1. Validate bundle configuration (catches errors before deploy)
-databricks bundle validate --profile <profile>
-
-# 2. Deploy the bundle (creates/updates resources, uploads files)
-databricks bundle deploy --profile <profile>
-
-# 3. Run the app (starts/restarts with uploaded source code) - REQUIRED!
-databricks bundle run agent_migration --profile <profile>
-```
-
-> **Important:** `bundle deploy` only uploads files and configures resources. `bundle run` is **required** to actually start/restart the app with the new code. If you only run `deploy`, the app will continue running old code!
-
-### 6.4 Test Deployed App
-
-> **Task:** Mark "Deploy to Databricks Apps" as `completed`. Mark "Test deployed app" as `in_progress`.
-
-```bash
-# Get the app URL
-APP_URL=$(databricks apps get <app-name> --profile <profile> --output json | jq -r '.url')
-
-# Get OAuth token
-TOKEN=$(databricks auth token --profile <profile> | jq -r .access_token)
-
-# Query the app
-curl -X POST ${APP_URL}/invocations \
-  -H "Authorization: Bearer $TOKEN" \
-  -H "Content-Type: application/json" \
-  -d '{"input": [{"role": "user", "content": "Hello!"}]}'
-```
-
-Once the deployed app responds successfully:
-
-> **Task:** Mark "Test deployed app" as `completed`. Migration complete!
-
-### 6.5 Deployment Troubleshooting
-
-If you encounter issues during deployment, refer to the **deploy** skill for detailed guidance.
-
-**Debug commands:**
-```bash
-# Validate bundle configuration
-databricks bundle validate --profile <profile>
-
-# View app logs
-databricks apps logs <app-name> --profile <profile> --follow
-
-# Check app status
-databricks apps get <app-name> --profile <profile> --output json | jq '{app_status, compute_status}'
-
-# Get app URL
-databricks apps get <app-name> --profile <profile> --output json | jq -r '.url'
-```
-
-**"App already exists" error:**
-If `databricks bundle deploy` fails because the app already exists, refer to the **deploy** skill for instructions on binding an existing app to the bundle.
-
----
-
-## Reference: App File Structure
-
-```
-<app-name>/
-├── agent_server/
-│   ├── __init__.py
-│   ├── agent.py          # Main agent logic - THIS IS WHERE YOU MIGRATE TO
-│   ├── start_server.py   # FastAPI server setup
-│   ├── utils.py          # Helper utilities
-│   └── evaluate_agent.py # Agent evaluation
-├── scripts/
-│   ├── __init__.py
-│   ├── quickstart.py     # Setup script
-│   └── start_app.py      # App startup
-├── databricks.yml        # Databricks Asset Bundle configuration (resources, config, targets)
-├── pyproject.toml        # Dependencies (for local dev with uv)
-├── requirements.txt      # REQUIRED: Must contain "uv" for Databricks Apps
-├── .env.example          # Environment template
-└── README.md
-```
-
-> **IMPORTANT:** The `requirements.txt` file must exist and contain `uv` so that Databricks Apps can install dependencies using the `pyproject.toml`. Without this file, the app will fail to start.
-
----
-
-## Reference: Common Migration Patterns
-
-### Pattern 1: Simple Chat Agent
-
-**Original:**
-```python
-class ChatAgent(ResponsesAgent):
-    def predict(self, request, params=None):
-        messages = to_chat_completions_input(request.input)
-        response = self.llm.invoke(messages)
-        return ResponsesAgentResponse(output=[...])
-```
-
-**Migrated (sync):**
-```python
-llm = ...  # Move class-level init to module level
-
-@invoke()
-def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
-    messages = to_chat_completions_input(request.input)
-    response = llm.invoke(messages)
-    return ResponsesAgentResponse(output=[...])
-
-@stream()
-def streaming(request: ResponsesAgentRequest):
-    # Original predict_stream() body, with self. removed
-    ...
-```
-
-**Migrated (async):**
-```python
-@invoke()
-async def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
-    outputs = [e.item async for e in streaming(request) if e.type == "response.output_item.done"]
-    return ResponsesAgentResponse(output=outputs)
-
-@stream()
-async def streaming(request: ResponsesAgentRequest) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
-    messages = {"messages": to_chat_completions_input([i.model_dump() for i in request.input])}
-    agent = await init_agent()
-    async for event in process_agent_astream_events(agent.astream(messages, stream_mode=["updates", "messages"])):
-        yield event
-```
-
-### Pattern 2: Agent with Custom Tools
-
-**Sync:** Keep tools as-is from the original code.
-
-**Async:** Migrate tools to async LangChain tools:
-
-```python
-from langchain_core.tools import tool
-
-@tool
-async def search_docs(query: str) -> str:
-    """Search the documentation."""
-    results = await vector_store.asimilarity_search(query)
-    return format_results(results)
-```
-
-### Pattern 3: Using LangGraph with create_agent (async only)
-
-```python
-from langchain.agents import create_agent
-from databricks_langchain import ChatDatabricks
-
-async def init_agent():
-    tools = await mcp_client.get_tools()  # MCP tools are async
-    model = ChatDatabricks(endpoint=LLM_ENDPOINT_NAME)
-    return create_agent(model=model, tools=tools, system_prompt=SYSTEM_PROMPT)
-```
-
----
-
-## Reference: Useful Resources
-
-- **Responses API Docs:** https://mlflow.org/docs/latest/genai/serving/responses-agent/
-- **Agent Framework:** https://docs.databricks.com/aws/en/generative-ai/agent-framework/
-- **Agent Tools:** https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool
-- **databricks-langchain SDK:** https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain
-
----
-
-## Troubleshooting
-
-### "Module not found" errors
-```bash
-uv sync  # Reinstall dependencies
-```
-
-### Authentication errors
-```bash
-databricks auth login  # Re-authenticate
-```
-
-### Lakebase permission errors
-- Ensure the Lakebase instance is added as an app resource in Databricks UI
-- Grant appropriate permissions on the Lakebase instance
-
-### Async errors (async migration only)
-- Ensure all I/O calls use async versions (e.g., `await client.achat()` not `client.chat()`)
-- Use `async for` instead of `for` when iterating async generators
-- If you chose sync migration, these errors should not occur — double-check that you're not mixing sync and async patterns
diff --git a/agent-supervisor-api/.claude/skills/modify-agent/SKILL.md b/agent-supervisor-api/.claude/skills/modify-agent/SKILL.md
deleted file mode 100644
index 0ac0d7e3..00000000
--- a/agent-supervisor-api/.claude/skills/modify-agent/SKILL.md
+++ /dev/null
@@ -1,147 +0,0 @@
----
-name: modify-agent
-description: "Modify agent code, add tools, or change configuration. Use when: (1) User says 'modify agent', 'add tool', 'change model', or 'edit agent.py', (2) Adding MCP servers to agent, (3) Changing agent instructions, (4) Understanding SDK patterns."
----
-
-# Modify the Agent
-
-## Main File
-
-**`agent_server/agent.py`** - Agent logic, model selection, instructions, MCP servers
-
-## Key Files
-
-| File                             | Purpose                                       |
-| -------------------------------- | --------------------------------------------- |
-| `agent_server/agent.py`          | Agent logic, model, instructions, MCP servers |
-| `agent_server/start_server.py`   | FastAPI server + MLflow setup                 |
-| `agent_server/evaluate_agent.py` | Agent evaluation with MLflow scorers          |
-| `agent_server/utils.py`          | Databricks auth helpers, stream processing    |
-| `databricks.yml`                 | Bundle config & resource permissions          |
-
-## SDK Setup
-
-```python
-import mlflow
-from databricks_openai import AsyncDatabricksOpenAI
-from agents import set_default_openai_api, set_default_openai_client, Agent
-from agents.tracing import set_trace_processors
-
-# Set up async client (recommended for agent servers)
-set_default_openai_client(AsyncDatabricksOpenAI())
-set_default_openai_api("chat_completions")
-
-# Use MLflow for tracing (disables SDK's built-in tracing)
-set_trace_processors([])
-mlflow.openai.autolog()
-```
-
-## Adding MCP Servers
-
-```python
-from databricks_openai.agents import McpServer
-
-# UC Functions
-uc_server = McpServer(
-    url=f"{host}/api/2.0/mcp/functions/{catalog}/{schema}",
-    name="uc functions",
-)
-
-# Genie Space
-genie_server = McpServer(
-    url=f"{host}/api/2.0/mcp/genie/{space_id}",
-    name="genie space",
-)
-
-# Vector Search
-vector_server = McpServer(
-    url=f"{host}/api/2.0/mcp/vector-search/{catalog}/{schema}/{index}",
-    name="vector search",
-)
-
-# Add to agent
-agent = Agent(
-    name="my agent",
-    instructions="You are a helpful agent.",
-    model="databricks-claude-3-7-sonnet",
-    mcp_servers=[uc_server, genie_server, vector_server],
-)
-```
-
-**After adding MCP servers:** Grant permissions in `databricks.yml` (see **add-tools** skill)
-
-## Changing the Model
-
-Available models (check workspace for current list):
-
-- `databricks-claude-3-7-sonnet`
-- `databricks-claude-3-5-sonnet`
-- `databricks-meta-llama-3-3-70b-instruct`
-
-```python
-agent = Agent(
-    name="my agent",
-    model="databricks-claude-3-7-sonnet",  # Change here
-    ...
-)
-```
-
-**Note:** Some workspaces require granting the app access to the serving endpoint in `databricks.yml`. See the **add-tools** skill and `examples/serving-endpoint.yaml`.
-
-## Changing Instructions
-
-```python
-agent = Agent(
-    name="my agent",
-    instructions="""You are a helpful data analyst assistant.
-
-    You have access to:
-    - Company sales data via Genie
-    - Product documentation via vector search
-
-    Always cite your sources when answering questions.""",
-    ...
-)
-```
-
-## Running the Agent
-
-```python
-from agents import Runner
-
-# Non-streaming
-messages = [{"role": "user", "content": "hi"}]
-result = await Runner.run(agent, messages)
-
-# Streaming
-result = Runner.run_streamed(agent, input=messages)
-async for event in result.stream_events():
-    # Process stream events
-    pass
-```
-
-**Converting to Responses API format:** Use `process_agent_stream_events()` from `agent_server/utils.py` to convert streaming output to Responses API compatible format:
-
-```python
-from agent_server.utils import process_agent_stream_events
-
-result = Runner.run_streamed(agent, input=messages)
-async for event in process_agent_stream_events(result.stream_events()):
-    yield event  # Yields ResponsesAgentStreamEvent objects
-```
-
-## External Resources
-
-1. [databricks-openai SDK](https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/openai)
-2. [Agent examples](https://github.com/databricks/app-templates)
-3. [Agent Framework docs](https://docs.databricks.com/aws/en/generative-ai/agent-framework/)
-4. [Adding tools](https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool)
-5. [OpenAI Agents SDK](https://platform.openai.com/docs/guides/agents-sdk)
-6. [Responses API](https://mlflow.org/docs/latest/genai/serving/responses-agent/)
-
-## Next Steps
-
-- Discover available tools: see **discover-tools** skill
-- Grant resource permissions: see **add-tools** skill
-- Test locally: see **run-locally** skill
-- Deploy: see **deploy** skill
diff --git a/agent-supervisor-api/.claude/skills/quickstart/SKILL.md b/agent-supervisor-api/.claude/skills/quickstart/SKILL.md
deleted file mode 100644
index e550162c..00000000
--- a/agent-supervisor-api/.claude/skills/quickstart/SKILL.md
+++ /dev/null
@@ -1,83 +0,0 @@
----
-name: quickstart
-description: "Set up Databricks agent development environment. Use when: (1) First time setup, (2) Configuring Databricks authentication, (3) User says 'quickstart', 'set up', 'authenticate', or 'configure databricks', (4) No .env file exists."
----
-
-# Quickstart & Authentication
-
-## Prerequisites
-
-- **uv** (Python package manager)
-- **nvm** with Node 20 (for frontend)
-- **Databricks CLI v0.283.0+**
-
-Check CLI version:
-```bash
-databricks -v  # Must be v0.283.0 or above
-brew upgrade databricks  # If version is too old
-```
-
-## Run Quickstart
-
-```bash
-uv run quickstart
-```
-
-**Options:**
-- `--profile NAME`: Use specified profile (non-interactive)
-- `--host URL`: Workspace URL for initial setup
-- `-h, --help`: Show help
-
-**Examples:**
-```bash
-# Interactive (prompts for profile selection)
-uv run quickstart
-
-# Non-interactive with existing profile
-uv run quickstart --profile DEFAULT
-
-# New workspace setup
-uv run quickstart --host https://your-workspace.cloud.databricks.com
-```
-
-## What Quickstart Configures
-
-Creates/updates `.env` with:
-- `DATABRICKS_CONFIG_PROFILE` - Selected CLI profile
-- `MLFLOW_TRACKING_URI` - Set to `databricks://<profile-name>` for local auth
-- `MLFLOW_EXPERIMENT_ID` - Auto-created experiment ID
-
-## Manual Authentication (Fallback)
-
-If quickstart fails:
-
-```bash
-# Create new profile
-databricks auth login --host https://your-workspace.cloud.databricks.com
-
-# Verify
-databricks auth profiles
-```
-
-Then manually create `.env` (copy from `.env.example`):
-```bash
-# Authentication (choose one method)
-DATABRICKS_CONFIG_PROFILE=DEFAULT
-# DATABRICKS_HOST=https://<your-workspace-here>.databricks.com
-# DATABRICKS_TOKEN=dapi....
-
-# MLflow configuration
-MLFLOW_EXPERIMENT_ID=<your-experiment-id>
-MLFLOW_TRACKING_URI="databricks://DEFAULT"
-MLFLOW_REGISTRY_URI="databricks-uc"
-
-# Frontend proxy settings
-CHAT_APP_PORT=3000
-CHAT_PROXY_TIMEOUT_SECONDS=300
-```
-
-## Next Steps
-
-After quickstart completes:
-1. Run `uv run discover-tools` to find available workspace resources (see **discover-tools** skill)
-2. Run `uv run start-app` to test locally (see **run-locally** skill)
diff --git a/agent-supervisor-api/.claude/skills/run-locally/SKILL.md b/agent-supervisor-api/.claude/skills/run-locally/SKILL.md
deleted file mode 100644
index 3eb83c82..00000000
--- a/agent-supervisor-api/.claude/skills/run-locally/SKILL.md
+++ /dev/null
@@ -1,90 +0,0 @@
----
-name: run-locally
-description: "Run and test the agent locally. Use when: (1) User says 'run locally', 'start server', 'test agent', or 'localhost', (2) Need curl commands to test API, (3) Troubleshooting local development issues, (4) Configuring server options like port or hot-reload."
----
-
-# Run Agent Locally
-
-## Start the Server
-
-```bash
-uv run start-app
-```
-
-This starts the agent at http://localhost:8000
-
-## Server Options
-
-```bash
-# Hot-reload on code changes (development)
-uv run start-server --reload
-
-# Custom port
-uv run start-server --port 8001
-
-# Multiple workers (production-like)
-uv run start-server --workers 4
-
-# Combine options
-uv run start-server --reload --port 8001
-```
-
-## Test the API
-
-**Streaming request:**
-```bash
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }'
-```
-
-**Non-streaming request:**
-```bash
-curl -X POST http://localhost:8000/invocations \
-  -H "Content-Type: application/json" \
-  -d '{ "input": [{ "role": "user", "content": "hi" }] }'
-```
-
-## Run Evaluation
-
-```bash
-uv run agent-evaluate
-```
-
-Uses MLflow scorers (RelevanceToQuery, Safety).
-
-## Run Unit Tests
-
-```bash
-pytest [path]
-```
-
-## Troubleshooting
-
-| Issue | Solution |
-|-------|----------|
-| **Port already in use** | Use `--port 8001` or kill existing process |
-| **Authentication errors** | Verify `.env` is correct; run **quickstart** skill |
-| **Module not found** | Run `uv sync` to install dependencies |
-| **MLflow experiment not found** | Ensure `MLFLOW_TRACKING_URI` in `.env` is `databricks://<profile-name>` |
-
-### MLflow Experiment Not Found
-
-If you see: "The provided MLFLOW_EXPERIMENT_ID environment variable value does not exist"
-
-**Verify the experiment exists:**
-```bash
-databricks -p <profile> experiments get-experiment <experiment_id>
-```
-
-**Fix:** Ensure `.env` has the correct tracking URI format:
-```bash
-MLFLOW_TRACKING_URI="databricks://DEFAULT"  # Include profile name
-```
-
-The quickstart script configures this automatically. If you manually edited `.env`, ensure the profile name is included.
-
-## Next Steps
-
-- Modify your agent: see **modify-agent** skill
-- Deploy to Databricks: see **deploy** skill
diff --git a/agent-supervisor-api/AGENTS.md b/agent-supervisor-api/AGENTS.md
deleted file mode 100644
index 78935cc5..00000000
--- a/agent-supervisor-api/AGENTS.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Agent Development Guide
-
-## MANDATORY First Actions
-
-**Ask the user interactively:**
-
-1. **App deployment target:**
-   > "Do you have an existing Databricks app you want to deploy to, or should we create a new one? If existing, what's the app name?"
-
-   *Note: New apps should use the `agent-*` prefix (e.g., `agent-data-analyst`) unless the user specifies otherwise.*
-
-**Then check authentication and profile configuration:**
-
-1. Read the `.env` file to find `DATABRICKS_CONFIG_PROFILE` (e.g., `dev`)
-2. Run `databricks auth profiles` to verify the profile is configured and valid
-
-**CRITICAL: All `databricks` CLI commands must include the profile from `.env`.** Either use `--profile` or set the env var:
-
-```bash
-databricks <command> --profile <profile>
-# or
-DATABRICKS_CONFIG_PROFILE=<profile> databricks <command>
-```
-
-If no profiles exist or `.env` is missing, guide the user through running `uv run quickstart` to set up authentication and configuration.
-
-## Understanding User Goals
-
-**Ask the user questions to understand what they're building:**
-
-1. **What is the agent's purpose?** (e.g., data analyst assistant, customer support, code helper)
-2. **What data or tools does it need access to?**
-   - Unity Catalog functions (SQL UDFs, Python UDFs)
-   - Genie Spaces for natural language data queries
-   - Agent endpoints for specialized sub-agents
-   - External MCP servers via UC connections
-
-Use `uv run discover-tools` to show available resources in their workspace, then help them select the right ones.
-
-## Hosted Tool Types
-
-The Supervisor API supports these tool types. Each is specified in the `TOOLS` list in `agent_server/agent.py`:
-
-| Type | Description | Required keys |
-|---|---|---|
-| `uc_function` | Calls a UC function (SQL or Python UDF) | `name`, `name_alias`, `description` |
-| `genie` | Queries a Genie space to answer data questions | `name`, `description`, `space_id` |
-| `agent_endpoint` | Delegates to an existing agent endpoint | `name`, `description`, `endpoint_name` |
-| `mcp` | Connects to an external MCP server via a UC connection | `name`, `description`, `connection_name` |
-
-**For each tool added**, also add the corresponding resource permission in `databricks.yml`. See the **add-tools** skill for examples.
-
-## Handling Deployment Errors
-
-**If `databricks bundle deploy` fails with "An app with the same name already exists":**
-
-Ask the user: "I see there's an existing app with the same name. Would you like me to bind it to this bundle so we can manage it, or delete it and create a new one?"
-
-- **If they want to bind**: See the **deploy** skill for binding steps
-- **If they want to delete**: Run `databricks apps delete <app-name>` then deploy again
-
----
-
-## Available Skills
-
-**Before executing any task, read the relevant skill file in `.claude/skills/`** - they contain tested commands, patterns, and troubleshooting steps.
-
-| Task | Skill | Path |
-|------|-------|------|
-| Setup, auth, first-time | **quickstart** | `.claude/skills/quickstart/SKILL.md` |
-| Find tools/resources | **discover-tools** | `.claude/skills/discover-tools/SKILL.md` |
-| Deploy to Databricks | **deploy** | `.claude/skills/deploy/SKILL.md` |
-| Add tools & permissions | **add-tools** | `.claude/skills/add-tools/SKILL.md` |
-| Run/test locally | **run-locally** | `.claude/skills/run-locally/SKILL.md` |
-| Modify agent code | **modify-agent** | `.claude/skills/modify-agent/SKILL.md` |
-
----
-
-## Quick Commands
-
-| Task | Command |
-|------|---------|
-| Setup | `uv run quickstart` |
-| Discover tools | `uv run discover-tools` |
-| Run locally | `uv run start-app` |
-| Deploy | `databricks bundle deploy && databricks bundle run agent_supervisor_api` |
-| View logs | `databricks apps logs <app-name> --follow` |
-
----
-
-## Key Files
-
-| File | Purpose |
-|------|---------|
-| `agent_server/agent.py` | Model, tools list, invoke/stream handlers |
-| `agent_server/start_server.py` | FastAPI server + MLflow setup |
-| `databricks.yml` | Bundle config & resource permissions |
-| `scripts/quickstart.py` | One-command setup script |
-| `scripts/discover_tools.py` | Discovers available workspace resources |
-
----
-
-## Agent Framework Capabilities
-
-> **⚠️ IMPORTANT:** When adding any tool to the agent, you MUST also grant permissions in `databricks.yml`. See the **add-tools** skill for required steps and examples.
-
-**Key difference from other templates**: This template offloads the agent loop to Databricks via the Supervisor API. You do not need to implement tool execution logic in Python — just declare hosted tools and Databricks handles the rest.
-
-**Common Patterns:**
-- **Structured data retrieval** - Use `genie` tool type to query SQL tables/databases
-- **Code interpreter** - Use `uc_function` with `system.ai.python_exec` for Python execution
-- **Sub-agent delegation** - Use `agent_endpoint` to call specialized agents
-- **External services** - Use `mcp` with a UC connection for external MCP servers
-
-Reference: https://docs.databricks.com/aws/en/generative-ai/agent-bricks/supervisor-api.html
diff --git a/agent-supervisor-api/CLAUDE.md b/agent-supervisor-api/CLAUDE.md
deleted file mode 100644
index 43c994c2..00000000
--- a/agent-supervisor-api/CLAUDE.md
+++ /dev/null
@@ -1 +0,0 @@
-@AGENTS.md
diff --git a/agent-supervisor-api/README.md b/agent-supervisor-api/README.md
deleted file mode 100644
index 2dfc1322..00000000
--- a/agent-supervisor-api/README.md
+++ /dev/null
@@ -1,106 +0,0 @@
-# Agent using Supervisor API
-
-This template defines a conversational agent app that uses the [Databricks Supervisor API](https://docs.databricks.com/aws/en/generative-ai/agent-bricks/supervisor-api.html) for server-side tool execution. The app comes with a built-in chat UI, but also exposes an API endpoint for invoking the agent so that you can serve your UI elsewhere.
-
-Instead of managing an agent loop in application code, this template passes the model, tools, and input to a single Databricks endpoint. Databricks handles tool selection and response synthesis, so your agent code stays minimal.
-
-The Supervisor API is multi-AI: swap the model name (e.g. from `databricks-claude-sonnet-4-5` to `databricks-gpt-5-2`) to change AI providers without modifying tool or agent logic.
-
-## Requirements
-
-- AI Gateway (Beta) enabled for your account. See [Manage previews](https://docs.databricks.com/aws/en/admin/workspace-settings/manage-previews.html).
-- `uv` (Python package manager), `nvm` (Node version manager), and the Databricks CLI installed locally.
-
-> **Note**: The Supervisor API is routed through AI Gateway at `/mlflow/v1/responses`, not through the standard model serving endpoint at `/serving-endpoints`. The template configures `DatabricksOpenAI` with `base_url=f"{host}/mlflow/v1"` to point to the correct endpoint.
-
-## Build with AI Assistance
-
-We recommend using AI coding assistants (Claude Code, Cursor, GitHub Copilot) to customize and deploy this template. Agent Skills in `.claude/skills/` provide step-by-step guidance for common tasks like setup, adding tools, and deployment. These skills are automatically detected by Claude, Cursor, and GitHub Copilot.
-
-## Quick start
-
-Run the `uv run quickstart` script to quickly set up your local environment and start the agent server. At any step, if there are issues, refer to the manual local development loop setup below.
-
-This script will:
-
-1. Verify uv, nvm, and Databricks CLI installations
-2. Configure Databricks authentication
-3. Configure agent tracing, by creating and linking an MLflow experiment to your app
-4. Start the agent server and chat app
-
-```bash
-uv run quickstart
-```
-
-After the setup is complete, you can start the agent server and the chat app locally with:
-
-```bash
-uv run start-app
-```
-
-This will start the agent server and the chat app at http://localhost:8000.
-
-**Next steps**: see [modifying your agent](#modifying-your-agent) to customize and iterate on the agent code.
-
-## Modifying your agent
-
-The key files for customizing this template:
-
-| File | Purpose |
-|---|---|
-| `agent_server/agent.py` | Agent logic: model, tools, handlers |
-| `databricks.yml` | Bundle config and resource permissions |
-
-### Changing the model
-
-Edit the `MODEL` variable in `agent_server/agent.py`:
-
-```python
-MODEL = "databricks-gpt-5-2"  # switch to any Databricks-hosted model
-```
-
-No other code changes are needed — the Supervisor API handles the rest.
-
-### Adding hosted tools
-
-Edit the `TOOLS` list in `agent_server/agent.py`. Supported tool types: `uc_function`, `genie`, `agent_endpoint`, `mcp`. For each tool you add, grant the corresponding permission in `databricks.yml`. See the `add-tools` skill for examples.
-
-## How the client is configured
-
-`DatabricksOpenAI` defaults to `{host}/serving-endpoints` as its base URL. The Supervisor API is served by AI Gateway at a **dedicated subdomain**, not the workspace host:
-
-```
-https://<workspace_id>.ai-gateway.<domain>/mlflow/v1/responses
-```
-
-The `_ai_gateway_base_url()` helper in `agent_server/agent.py` derives this URL automatically from the workspace host and ID:
-
-```python
-def _ai_gateway_base_url(wc: WorkspaceClient) -> str:
-    host = wc.config.host          # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    return f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-```
-
-Authentication is handled automatically by the `WorkspaceClient` using your configured Databricks CLI credentials.
-
-## Deploying to Databricks Apps
-
-```bash
-databricks bundle deploy && databricks bundle run agent_supervisor_api
-```
-
-After the first deployment, the app URL is printed. Subsequent deployments update the existing app in place.
-
-## Running tests
-
-```bash
-uv run pytest tests/ -v
-```
-
-Unit tests run without credentials. Integration tests against the live Supervisor API require setting `ENG_ML_INFERENCE_TOKEN` (or `DATABRICKS_TOKEN` pointed at a workspace with AI Gateway enabled):
-
-```bash
-ENG_ML_INFERENCE_TOKEN=dapi... uv run pytest tests/ -v
-```
diff --git a/agent-supervisor-api/agent_server/__init__.py b/agent-supervisor-api/agent_server/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/agent-supervisor-api/agent_server/agent.py b/agent-supervisor-api/agent_server/agent.py
deleted file mode 100644
index eb5eacfa..00000000
--- a/agent-supervisor-api/agent_server/agent.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import logging
-import re
-from typing import AsyncGenerator
-
-import mlflow
-from databricks.sdk import WorkspaceClient
-from databricks_openai import DatabricksOpenAI
-from mlflow.genai.agent_server import invoke, stream
-from mlflow.types.responses import (
-    ResponsesAgentRequest,
-    ResponsesAgentResponse,
-    ResponsesAgentStreamEvent,
-)
-
-from agent_server.utils import get_session_id
-
-mlflow.openai.autolog()
-logging.getLogger("mlflow.utils.autologging_utils").setLevel(logging.ERROR)
-
-# Model name controls which AI provider runs the agent loop.
-# Swap to any Databricks-hosted model without changing your tool or agent code.
-MODEL = "databricks-claude-opus-4-6"
-
-# Hosted tools — the Supervisor API runs the tool-selection and synthesis loop
-# server-side. Add or remove tool definitions here to change agent behavior.
-TOOLS = [
-    {
-        "type": "genie",
-        "genie": {
-            "name": "nyc-taxi-space",
-            "description": "Information about NYC Taxi spaces",
-            "space_id": "01f07892cf3118edad0a4054bcd25122",
-        },
-    }
-]
-
-
-def _ai_gateway_base_url(wc: WorkspaceClient) -> str:
-    """Derive the AI Gateway base URL from the workspace host and workspace ID.
-
-    The Supervisor API is served by AI Gateway at a dedicated subdomain:
-      https://<workspace_id>.ai-gateway.<domain>/mlflow/v1
-
-    This is distinct from the workspace host (used for /serving-endpoints).
-    """
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    # Strip the first subdomain and replace with <workspace_id>.ai-gateway
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    return f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-
-
-_EXTRA_HEADERS = {"x-databricks-traffic-id": "testenv://liteswap/mas-arv"}
-
-
-def _get_client() -> DatabricksOpenAI:
-    wc = WorkspaceClient()
-    return DatabricksOpenAI(
-        workspace_client=wc,
-        base_url=_ai_gateway_base_url(wc),
-    )
-
-
-@invoke()
-def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
-    if session_id := get_session_id(request):
-        mlflow.update_current_trace(metadata={"mlflow.trace.session": session_id})
-    response = _get_client().responses.create(
-        model=MODEL,
-        input=[i.model_dump() for i in request.input],
-        tools=TOOLS,
-        stream=False,
-        extra_headers=_EXTRA_HEADERS,
-    )
-    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
-
-
-@stream()
-def stream_handler(
-    request: ResponsesAgentRequest,
-) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
-    if session_id := get_session_id(request):
-        mlflow.update_current_trace(metadata={"mlflow.trace.session": session_id})
-    return _get_client().responses.create(
-        model=MODEL,
-        input=[i.model_dump() for i in request.input],
-        tools=TOOLS,
-        stream=True,
-        extra_headers=_EXTRA_HEADERS,
-    )
diff --git a/agent-supervisor-api/agent_server/evaluate_agent.py b/agent-supervisor-api/agent_server/evaluate_agent.py
deleted file mode 100644
index b816ef4b..00000000
--- a/agent-supervisor-api/agent_server/evaluate_agent.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import asyncio
-import logging
-
-import mlflow
-from dotenv import load_dotenv
-from mlflow.genai.agent_server import get_invoke_function
-from mlflow.genai.scorers import (
-    Completeness,
-    ConversationalSafety,
-    ConversationCompleteness,
-    Fluency,
-    KnowledgeRetention,
-    RelevanceToQuery,
-    Safety,
-    ToolCallCorrectness,
-    UserFrustration,
-)
-from mlflow.genai.simulators import ConversationSimulator
-from mlflow.types.responses import ResponsesAgentRequest
-
-# Load environment variables from .env if it exists
-load_dotenv(dotenv_path=".env", override=True)
-logging.getLogger("mlflow.utils.autologging_utils").setLevel(logging.ERROR)
-
-# need to import agent for our @invoke-registered function to be found
-from agent_server import agent  # noqa: F401
-
-# Create your evaluation dataset
-# Refer to documentation for evaluations:
-# Scorers: https://docs.databricks.com/aws/en/mlflow3/genai/eval-monitor/concepts/scorers
-# Predefined LLM scorers: https://mlflow.org/docs/latest/genai/eval-monitor/scorers/llm-judge/predefined
-# Defining custom scorers: https://docs.databricks.com/aws/en/mlflow3/genai/eval-monitor/custom-scorers
-test_cases = [
-    {
-        "goal": "Learn about the main dishes of Vietnamese cuisine",
-        "persona": "An impatient foodie who doesn't know much about Vietnamese cuisine.",
-        "simulation_guidelines": [
-            "Initially explore the main influences of Vietnamese cuisine before the main dishes.",
-        ],
-    },
-    {
-        "goal": "Figure out which prime numbers between 1 and 50 are also Fibonacci numbers",
-        "persona": "You are a math novice who has heard of prime numbers but doesn't know what Fibonacci numbers are.",
-        "simulation_guidelines": [
-            "Initially ask questions to understand the Fibonacci sequence before exploring which ones are prime.",
-            "Prefer short messages",
-        ],
-    },
-]
-
-simulator = ConversationSimulator(
-    test_cases=test_cases,
-    max_turns=5,
-    user_model="databricks:/databricks-claude-sonnet-4-5",
-)
-
-# Get the invoke function that was registered via @invoke decorator in your agent
-invoke_fn = get_invoke_function()
-assert invoke_fn is not None, (
-    "No function registered with the `@invoke` decorator found."
-    "Ensure you have a function decorated with `@invoke()`."
-)
-
-# if invoke function is async, wrap it in a sync function.
-# The simulator may already be running an event loop, so we use nest_asyncio
-# to allow nested run_until_complete() calls without deadlocking.
-if asyncio.iscoroutinefunction(invoke_fn):
-    import nest_asyncio
-
-    nest_asyncio.apply()
-
-    def predict_fn(input: list[dict], **kwargs) -> dict:
-        req = ResponsesAgentRequest(input=input)
-        loop = asyncio.get_event_loop()
-        response = loop.run_until_complete(invoke_fn(req))
-        return response.model_dump()
-else:
-
-    def predict_fn(input: list[dict], **kwargs) -> dict:
-        req = ResponsesAgentRequest(input=input)
-        response = invoke_fn(req)
-        return response.model_dump()
-
-
-def evaluate():
-    mlflow.genai.evaluate(
-        data=simulator,
-        predict_fn=predict_fn,
-        scorers=[
-            Completeness(),
-            ConversationCompleteness(),
-            ConversationalSafety(),
-            KnowledgeRetention(),
-            UserFrustration(),
-            Fluency(),
-            RelevanceToQuery(),
-            Safety(),
-            ToolCallCorrectness(),
-        ],
-    )
diff --git a/agent-supervisor-api/agent_server/start_server.py b/agent-supervisor-api/agent_server/start_server.py
deleted file mode 100644
index 1d5ecd0c..00000000
--- a/agent-supervisor-api/agent_server/start_server.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from dotenv import load_dotenv
-from mlflow.genai.agent_server import AgentServer, setup_mlflow_git_based_version_tracking
-
-# Load env vars from .env before importing the agent for proper auth
-load_dotenv(dotenv_path=".env", override=True)
-
-# Need to import the agent to register the functions with the server
-import agent_server.agent  # noqa: E402
-
-agent_server = AgentServer("ResponsesAgent", enable_chat_proxy=True)
-# Define the app as a module level variable to enable multiple workers
-app = agent_server.app  # noqa: F841
-setup_mlflow_git_based_version_tracking()
-
-
-def main():
-    agent_server.run(app_import_string="agent_server.start_server:app")
diff --git a/agent-supervisor-api/agent_server/utils.py b/agent-supervisor-api/agent_server/utils.py
deleted file mode 100644
index 4fb82302..00000000
--- a/agent-supervisor-api/agent_server/utils.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from mlflow.types.responses import ResponsesAgentRequest
-
-
-def get_session_id(request: ResponsesAgentRequest) -> str | None:
-    if request.context and request.context.conversation_id:
-        return request.context.conversation_id
-    if request.custom_inputs and isinstance(request.custom_inputs, dict):
-        return request.custom_inputs.get("session_id")
-    return None
diff --git a/agent-supervisor-api/app.yaml b/agent-supervisor-api/app.yaml
deleted file mode 100644
index 34465373..00000000
--- a/agent-supervisor-api/app.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-command: ["uv", "run", "start-app"]
-# databricks apps listen by default on port 8000
-
-env:
-  - name: MLFLOW_TRACKING_URI
-    value: "databricks"
-  - name: MLFLOW_REGISTRY_URI
-    value: "databricks-uc"
-  - name: API_PROXY
-    value: "http://localhost:8000/invocations"
-  - name: CHAT_APP_PORT
-    value: "3000"
-  - name: CHAT_PROXY_TIMEOUT_SECONDS
-    value: "300"
-  - name: MLFLOW_EXPERIMENT_ID
-    valueFrom: "experiment"
diff --git a/agent-supervisor-api/databricks.yml b/agent-supervisor-api/databricks.yml
deleted file mode 100644
index cb0f304e..00000000
--- a/agent-supervisor-api/databricks.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-bundle:
-  name: agent_supervisor_api
-
-resources:
-  apps:
-    agent_supervisor_api:
-      name: "agent-supervisor-api"
-      description: "Supervisor API agent application"
-      source_code_path: ./
-      config:
-        command: ["uv", "run", "start-app"]
-        env:
-          - name: MLFLOW_TRACKING_URI
-            value: "databricks"
-          - name: MLFLOW_REGISTRY_URI
-            value: "databricks-uc"
-          - name: API_PROXY
-            value: "http://localhost:8000/invocations"
-          - name: CHAT_APP_PORT
-            value: "3000"
-          - name: CHAT_PROXY_TIMEOUT_SECONDS
-            value: "300"
-          - name: MLFLOW_EXPERIMENT_ID
-            value_from: "experiment"
-
-      # Resources which this app has access to
-      resources:
-        - name: 'experiment'
-          experiment:
-            experiment_id: ""
-            permission: 'CAN_MANAGE'
-        - name: 'python_exec'
-          unity_catalog_function:
-            function_full_name: "system.ai.python_exec"
-            permission: 'EXECUTE'
-
-targets:
-  dev:
-    mode: development
-    default: true
-    # workspace:
-    #   host: https://...
-
-  prod:
-    mode: production
-    # workspace:
-    #   host: https://...
-    resources:
-      apps:
-        agent_supervisor_api:
-          name: agent-supervisor-api
diff --git a/agent-supervisor-api/pyproject.toml b/agent-supervisor-api/pyproject.toml
deleted file mode 100644
index 037cf786..00000000
--- a/agent-supervisor-api/pyproject.toml
+++ /dev/null
@@ -1,36 +0,0 @@
-[project]
-name = "agent-server"
-version = "0.1.0"
-description = "MLflow-compatible agent server using the Databricks Supervisor API"
-readme = "README.md"
-authors = [
-    { name = "Agent Developer", email = "developer@example.com" }
-]
-requires-python = ">=3.11"
-dependencies = [
-    "fastapi>=0.129.0",
-    "uvicorn>=0.41.0",
-    "databricks-openai>=0.9.0",
-    "databricks-sdk>=0.55.0",
-    "databricks-agents>=1.9.3",
-    "mlflow>=3.10.0",
-    "python-dotenv>=1.2.1",
-]
-
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
-
-[dependency-groups]
-dev = [
-    "hatchling>=1.28.0",
-    "pytest>=9.0.2",
-]
-
-
-[project.scripts]
-quickstart = "scripts.quickstart:main"
-start-app = "scripts.start_app:main"
-start-server = "agent_server.start_server:main"
-agent-evaluate = "agent_server.evaluate_agent:evaluate"
-discover-tools = "scripts.discover_tools:main"
diff --git a/agent-supervisor-api/requirements.txt b/agent-supervisor-api/requirements.txt
deleted file mode 100644
index 60cc5e6a..00000000
--- a/agent-supervisor-api/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-uv
diff --git a/agent-supervisor-api/scripts/__init__.py b/agent-supervisor-api/scripts/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/agent-supervisor-api/scripts/discover_tools.py b/agent-supervisor-api/scripts/discover_tools.py
deleted file mode 100755
index 3eb37963..00000000
--- a/agent-supervisor-api/scripts/discover_tools.py
+++ /dev/null
@@ -1,432 +0,0 @@
-#!/usr/bin/env python3
-"""
-Discover available tools and data sources for Databricks agents.
-
-This script scans for:
-- Unity Catalog functions (data retrieval tools e.g. SQL UDFs)
-- Unity Catalog tables (data sources)
-- Vector search indexes (RAG data sources)
-- Genie spaces (conversational interface over structured data)
-- Custom MCP servers (Databricks apps with name mcp-*)
-- External MCP servers (via Unity Catalog connections)
-"""
-
-import json
-import subprocess
-import sys
-from pathlib import Path
-from typing import Any, Dict, List
-
-from databricks.sdk import WorkspaceClient
-
-DEFAULT_MAX_RESULTS = 100
-DEFAULT_MAX_SCHEMAS = 25
-
-def run_databricks_cli(args: List[str]) -> str:
-    """Run databricks CLI command and return output."""
-    try:
-        result = subprocess.run(
-            ["databricks"] + args,
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        return result.stdout
-    except subprocess.CalledProcessError as e:
-        print(f"Error running databricks CLI: {e.stderr}", file=sys.stderr)
-        return ""
-
-
-def discover_uc_functions(w: WorkspaceClient, catalog: str = None, max_schemas: int = DEFAULT_MAX_SCHEMAS) -> List[Dict[str, Any]]:
-    """Discover Unity Catalog functions that could be used as tools.
-
-    Args:
-        w: WorkspaceClient instance
-        catalog: Optional specific catalog to search
-        max_schemas: Total number of schemas to search across all catalogs
-    """
-    functions = []
-    schemas_searched = 0
-
-    try:
-        catalogs = [catalog] if catalog else [c.name for c in w.catalogs.list()]
-
-        for cat in catalogs:
-            if schemas_searched >= max_schemas:
-                break
-
-            try:
-                all_schemas = list(w.schemas.list(catalog_name=cat))
-                # Take schemas from this catalog until we hit the global budget
-                schemas_to_search = all_schemas[:max_schemas - schemas_searched]
-
-                for schema in schemas_to_search:
-                    schema_name = f"{cat}.{schema.name}"
-                    try:
-                        funcs = list(w.functions.list(catalog_name=cat, schema_name=schema.name))
-                        for func in funcs:
-                            functions.append({
-                                "type": "uc_function",
-                                "name": func.full_name,
-                                "catalog": cat,
-                                "schema": schema.name,
-                                "function_name": func.name,
-                                "comment": func.comment,
-                                "routine_definition": getattr(func, "routine_definition", None),
-                            })
-                    except Exception as e:
-                        # Skip schemas we can't access
-                        continue
-                    finally:
-                        schemas_searched += 1
-            except Exception as e:
-                # Skip catalogs we can't access
-                continue
-
-    except Exception as e:
-        print(f"Error discovering UC functions: {e}", file=sys.stderr)
-
-    return functions
-
-
-def discover_uc_tables(w: WorkspaceClient, catalog: str = None, schema: str = None, max_schemas: int = DEFAULT_MAX_SCHEMAS) -> List[Dict[str, Any]]:
-    """Discover Unity Catalog tables that could be queried.
-
-    Args:
-        w: WorkspaceClient instance
-        catalog: Optional specific catalog to search
-        schema: Optional specific schema to search (requires catalog)
-        max_schemas: Total number of schemas to search across all catalogs
-    """
-    tables = []
-    schemas_searched = 0
-
-    try:
-        catalogs = [catalog] if catalog else [c.name for c in w.catalogs.list()]
-
-        for cat in catalogs:
-            if cat in ["__databricks_internal", "system"]:
-                continue
-
-            if schemas_searched >= max_schemas:
-                break
-
-            try:
-                if schema:
-                    schemas_to_search = [schema]
-                else:
-                    all_schemas = [s.name for s in w.schemas.list(catalog_name=cat)]
-                    # Take schemas from this catalog until we hit the global budget
-                    schemas_to_search = all_schemas[:max_schemas - schemas_searched]
-
-                for sch in schemas_to_search:
-                    if sch == "information_schema":
-                        schemas_searched += 1
-                        continue
-
-                    try:
-                        tbls = list(w.tables.list(catalog_name=cat, schema_name=sch))
-                        for tbl in tbls:
-                            # Get column info
-                            columns = []
-                            if hasattr(tbl, "columns") and tbl.columns:
-                                columns = [
-                                    {"name": col.name, "type": col.type_name.value if hasattr(col.type_name, "value") else str(col.type_name)}
-                                    for col in tbl.columns
-                                ]
-
-                            tables.append({
-                                "type": "uc_table",
-                                "name": tbl.full_name,
-                                "catalog": cat,
-                                "schema": sch,
-                                "table_name": tbl.name,
-                                "table_type": tbl.table_type.value if tbl.table_type else None,
-                                "comment": tbl.comment,
-                                "columns": columns,
-                            })
-                    except Exception as e:
-                        # Skip schemas we can't access
-                        pass
-                    finally:
-                        schemas_searched += 1
-            except Exception as e:
-                # Skip catalogs we can't access
-                continue
-
-    except Exception as e:
-        print(f"Error discovering UC tables: {e}", file=sys.stderr)
-
-    return tables
-
-
-def discover_vector_search_indexes(w: WorkspaceClient) -> List[Dict[str, Any]]:
-    """Discover Vector Search indexes for RAG applications."""
-    indexes = []
-
-    try:
-        # List all vector search endpoints
-        endpoints = list(w.vector_search_endpoints.list_endpoints())
-
-        for endpoint in endpoints:
-            try:
-                # List indexes for each endpoint
-                endpoint_indexes = list(w.vector_search_indexes.list_indexes(endpoint_name=endpoint.name))
-                for idx in endpoint_indexes:
-                    indexes.append({
-                        "type": "vector_search_index",
-                        "name": idx.name,
-                        "endpoint": endpoint.name,
-                        "primary_key": idx.primary_key,
-                        "index_type": idx.index_type.value if idx.index_type else None,
-                        "status": idx.status.state.value if idx.status and idx.status.state else None,
-                    })
-            except Exception as e:
-                # Skip endpoints we can't access
-                continue
-
-    except Exception as e:
-        print(f"Error discovering vector search indexes: {e}", file=sys.stderr)
-
-    return indexes
-
-
-def discover_genie_spaces(w: WorkspaceClient) -> List[Dict[str, Any]]:
-    """Discover Genie spaces for conversational data access."""
-    spaces = []
-
-    try:
-        # Use SDK to list genie spaces
-        response = w.genie.list_spaces()
-        genie_spaces = response.spaces if hasattr(response, "spaces") else []
-        for space in genie_spaces:
-            spaces.append({
-                "type": "genie_space",
-                "id": space.space_id,
-                "name": space.title,
-                "description": space.description,
-            })
-    except Exception as e:
-        print(f"Error discovering Genie spaces: {e}", file=sys.stderr)
-
-    return spaces
-
-
-
-def discover_custom_mcp_servers(w: WorkspaceClient) -> List[Dict[str, Any]]:
-    """Discover custom MCP servers deployed as Databricks apps."""
-    custom_servers = []
-
-    try:
-        # List all apps and filter for those starting with mcp-
-        apps = w.apps.list()
-        for app in apps:
-            if app.name and app.name.startswith("mcp-"):
-                custom_servers.append({
-                    "type": "custom_mcp_server",
-                    "name": app.name,
-                    "url": app.url,
-                    "status": app.app_status.state.value if app.app_status and app.app_status.state else None,
-                    "description": app.description,
-                })
-    except Exception as e:
-        print(f"Error discovering custom MCP servers: {e}", file=sys.stderr)
-
-    return custom_servers
-
-
-def discover_external_mcp_servers(w: WorkspaceClient) -> List[Dict[str, Any]]:
-    """Discover external MCP servers configured via Unity Catalog connections."""
-    external_servers = []
-
-    try:
-        # List all connections and filter for MCP connections
-        connections = w.connections.list()
-        for conn in connections:
-            # Check if this is an MCP connection
-            if conn.options and conn.options.get("is_mcp_connection") == "true":
-                external_servers.append({
-                    "type": "external_mcp_server",
-                    "name": conn.name,
-                    "connection_type": conn.connection_type.value if hasattr(conn.connection_type, "value") else str(conn.connection_type),
-                    "comment": conn.comment,
-                    "full_name": conn.full_name,
-                })
-    except Exception as e:
-        print(f"Error discovering external MCP servers: {e}", file=sys.stderr)
-
-    return external_servers
-
-
-def format_output_markdown(results: Dict[str, List[Dict[str, Any]]]) -> str:
-    """Format discovery results as markdown."""
-    lines = ["# Agent Tools and Data Sources Discovery\n"]
-
-    # UC Functions
-    functions = results.get("uc_functions", [])
-    if functions:
-        lines.append(f"## Unity Catalog Functions ({len(functions)})\n")
-        lines.append("**What they are:** SQL UDFs that can be used as agent tools.\n")
-        lines.append("**How to use:** Access via UC functions MCP server:")
-        lines.append("- All functions in a schema: `{workspace_host}/api/2.0/mcp/functions/{catalog}/{schema}`")
-        lines.append("- Single function: `{workspace_host}/api/2.0/mcp/functions/{catalog}/{schema}/{function_name}`\n")
-        for func in functions[:10]:  # Show first 10
-            lines.append(f"- `{func['name']}`")
-            if func.get("comment"):
-                lines.append(f"  - {func['comment']}")
-        if len(functions) > 10:
-            lines.append(f"\n*...and {len(functions) - 10} more*\n")
-        lines.append("")
-
-    # UC Tables
-    tables = results.get("uc_tables", [])
-    if tables:
-        lines.append(f"## Unity Catalog Tables ({len(tables)})\n")
-        lines.append("Structured data that agents can query via UC SQL functions.\n")
-        for table in tables[:10]:  # Show first 10
-            lines.append(f"- `{table['name']}` ({table['table_type']})")
-            if table.get("comment"):
-                lines.append(f"  - {table['comment']}")
-            if table.get("columns"):
-                col_names = [c["name"] for c in table["columns"][:5]]
-                lines.append(f"  - Columns: {', '.join(col_names)}")
-        if len(tables) > 10:
-            lines.append(f"\n*...and {len(tables) - 10} more*\n")
-        lines.append("")
-
-    # Vector Search Indexes
-    indexes = results.get("vector_search_indexes", [])
-    if indexes:
-        lines.append(f"## Vector Search Indexes ({len(indexes)})\n")
-        lines.append("These can be used for RAG applications with unstructured data.\n")
-        lines.append("**How to use:** Connect via MCP server at `{workspace_host}/api/2.0/mcp/vector-search/{catalog}/{schema}` or\n")
-        lines.append("`{workspace_host}/api/2.0/mcp/vector-search/{catalog}/{schema}/{index_name}`\n")
-        for idx in indexes:
-            lines.append(f"- `{idx['name']}`")
-            lines.append(f"  - Endpoint: {idx['endpoint']}")
-            lines.append(f"  - Status: {idx['status']}")
-        lines.append("")
-
-    # Genie Spaces
-    spaces = results.get("genie_spaces", [])
-    if spaces:
-        lines.append(f"## Genie Spaces ({len(spaces)})\n")
-        lines.append("**What they are:** Natural language interface to your data\n")
-        lines.append("**How to use:** Connect via Genie MCP server at `{workspace_host}/api/2.0/mcp/genie/{space_id}`\n")
-        for space in spaces:
-            lines.append(f"- `{space['name']}` (ID: {space['id']})")
-            if space.get("description"):
-                lines.append(f"  - {space['description']}")
-        lines.append("")
-
-    # Custom MCP Servers (Databricks Apps)
-    custom_servers = results.get("custom_mcp_servers", [])
-    if custom_servers:
-        lines.append(f"## Custom MCP Servers ({len(custom_servers)})\n")
-        lines.append("**What:** Your own MCP servers deployed as Databricks Apps (names starting with mcp-)\n")
-        lines.append("**How to use:** Access via `{app_url}/mcp`\n")
-        lines.append("**⚠️ Important:** Custom MCP server apps require manual permission grants:")
-        lines.append("1. Get your agent app's service principal: `databricks apps get <agent-app> --output json | jq -r '.service_principal_name'`")
-        lines.append("2. Grant permission: `databricks apps update-permissions <mcp-server-app> --service-principal <sp-name> --permission-level CAN_USE`")
-        lines.append("(Apps are not yet supported as resource dependencies in databricks.yml)\n")
-        for server in custom_servers:
-            lines.append(f"- `{server['name']}`")
-            if server.get("url"):
-                lines.append(f"  - URL: {server['url']}")
-            if server.get("status"):
-                lines.append(f"  - Status: {server['status']}")
-            if server.get("description"):
-                lines.append(f"  - {server['description']}")
-        lines.append("")
-
-    # External MCP Servers (UC Connections)
-    external_servers = results.get("external_mcp_servers", [])
-    if external_servers:
-        lines.append(f"## External MCP Servers ({len(external_servers)})\n")
-        lines.append("**What:** Third-party MCP servers via Unity Catalog connections\n")
-        lines.append("**How to use:** Connect via `{workspace_host}/api/2.0/mcp/external/{connection_name}`\n")
-        lines.append("**Benefits:** Secure access to external APIs through UC governance\n")
-        for server in external_servers:
-            lines.append(f"- `{server['name']}`")
-            if server.get("full_name"):
-                lines.append(f"  - Full name: {server['full_name']}")
-            if server.get("comment"):
-                lines.append(f"  - {server['comment']}")
-        lines.append("")
-    return "\n".join(lines)
-
-
-def main():
-    """Main discovery function."""
-    import argparse
-
-    parser = argparse.ArgumentParser(description="Discover available agent tools and data sources")
-    parser.add_argument("--catalog", help="Limit discovery to specific catalog")
-    parser.add_argument("--schema", help="Limit discovery to specific schema (requires --catalog)")
-    parser.add_argument("--format", choices=["json", "markdown"], default="markdown", help="Output format")
-    parser.add_argument("--output", help="Output file (default: stdout)")
-    parser.add_argument("--profile", help="Databricks CLI profile to use (default: uses default profile)")
-    parser.add_argument("--max-results", type=int, default=DEFAULT_MAX_RESULTS, help=f"Maximum results per resource type (default: {DEFAULT_MAX_RESULTS})")
-    parser.add_argument("--max-schemas", type=int, default=DEFAULT_MAX_SCHEMAS, help=f"Total schemas to search across all catalogs (default: {DEFAULT_MAX_SCHEMAS})")
-
-    args = parser.parse_args()
-
-    if args.schema and not args.catalog:
-        print("Error: --schema requires --catalog", file=sys.stderr)
-        sys.exit(1)
-
-    print("Discovering available tools and data sources...", file=sys.stderr)
-
-    # Initialize Databricks workspace client
-    # Only pass profile if specified, otherwise use default
-    if args.profile:
-        w = WorkspaceClient(profile=args.profile)
-    else:
-        w = WorkspaceClient()
-
-    results = {}
-
-    # Discover each type with configurable limits
-    print("- UC Functions...", file=sys.stderr)
-    results["uc_functions"] = discover_uc_functions(w, catalog=args.catalog, max_schemas=args.max_schemas)[:args.max_results]
-
-    print("- UC Tables...", file=sys.stderr)
-    results["uc_tables"] = discover_uc_tables(w, catalog=args.catalog, schema=args.schema, max_schemas=args.max_schemas)[:args.max_results]
-
-    print("- Vector Search Indexes...", file=sys.stderr)
-    results["vector_search_indexes"] = discover_vector_search_indexes(w)[:args.max_results]
-
-    print("- Genie Spaces...", file=sys.stderr)
-    results["genie_spaces"] = discover_genie_spaces(w)[:args.max_results]
-
-    print("- Custom MCP Servers (Apps)...", file=sys.stderr)
-    results["custom_mcp_servers"] = discover_custom_mcp_servers(w)[:args.max_results]
-
-    print("- External MCP Servers (Connections)...", file=sys.stderr)
-    results["external_mcp_servers"] = discover_external_mcp_servers(w)[:args.max_results]
-
-    # Format output
-    if args.format == "json":
-        output = json.dumps(results, indent=2)
-    else:
-        output = format_output_markdown(results)
-
-    # Write output
-    if args.output:
-        Path(args.output).write_text(output)
-        print(f"\nResults written to {args.output}", file=sys.stderr)
-    else:
-        print("\n" + output)
-
-    # Print summary
-    print("\n=== Discovery Summary ===", file=sys.stderr)
-    print(f"UC Functions: {len(results['uc_functions'])}", file=sys.stderr)
-    print(f"UC Tables: {len(results['uc_tables'])}", file=sys.stderr)
-    print(f"Vector Search Indexes: {len(results['vector_search_indexes'])}", file=sys.stderr)
-    print(f"Genie Spaces: {len(results['genie_spaces'])}", file=sys.stderr)
-    print(f"Custom MCP Servers: {len(results['custom_mcp_servers'])}", file=sys.stderr)
-    print(f"External MCP Servers: {len(results['external_mcp_servers'])}", file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/agent-supervisor-api/scripts/quickstart.py b/agent-supervisor-api/scripts/quickstart.py
deleted file mode 100644
index 59982e03..00000000
--- a/agent-supervisor-api/scripts/quickstart.py
+++ /dev/null
@@ -1,768 +0,0 @@
-#!/usr/bin/env python3
-"""
-Quickstart setup script for Databricks agent development.
-
-This script handles:
-- Checking prerequisites (uv, nvm, Node 20, Databricks CLI)
-- Databricks authentication (OAuth)
-- MLflow experiment creation
-- Environment variable configuration (.env)
-- Lakebase instance setup (for memory-enabled templates)
-
-Usage:
-    uv run quickstart [OPTIONS]
-
-Options:
-    --profile NAME    Use specified Databricks profile (non-interactive)
-    --host URL        Databricks workspace URL (for initial setup)
-    --lakebase NAME   Lakebase instance name (for memory features)
-    -h, --help        Show this help message
-"""
-
-import argparse
-import json
-import os
-import platform
-import re
-import secrets
-import shutil
-import subprocess
-import sys
-from pathlib import Path
-
-
-def print_header(text: str) -> None:
-    """Print a section header."""
-    print(f"\n{'=' * 67}")
-    print(text)
-    print("=" * 67)
-
-
-def print_step(text: str) -> None:
-    """Print a step indicator."""
-    print(f"\n{text}")
-
-
-def print_success(text: str) -> None:
-    """Print a success message."""
-    print(f"✓ {text}")
-
-
-def print_error(text: str) -> None:
-    """Print an error message."""
-    print(f"✗ {text}", file=sys.stderr)
-
-
-def print_troubleshooting_auth() -> None:
-    print("\nTroubleshooting tips:")
-    print("  • Ensure you have network connectivity to your Databricks workspace")
-    print("  • Try running 'databricks auth login' manually to see detailed errors")
-    print("  • Check that your workspace URL is correct")
-    print("  • If using a browser for OAuth, ensure popups are not blocked")
-
-
-def print_troubleshooting_api() -> None:
-    print("\nTroubleshooting tips:")
-    print("  • Your authentication token may have expired - try 'databricks auth login' to refresh")
-    print("  • Verify your profile is valid with 'databricks auth profiles'")
-    print("  • Check network connectivity to your Databricks workspace")
-
-
-def command_exists(cmd: str) -> bool:
-    """Check if a command exists in PATH."""
-    return shutil.which(cmd) is not None
-
-
-def run_command(
-    cmd: list[str],
-    capture_output: bool = True,
-    check: bool = True,
-    env: dict = None,
-    show_output: bool = False,
-) -> subprocess.CompletedProcess:
-    """Run a command and return the result."""
-    merged_env = {**os.environ, **(env or {})}
-    if show_output:
-        return subprocess.run(cmd, check=check, env=merged_env)
-    return subprocess.run(
-        cmd, capture_output=capture_output, text=True, check=check, env=merged_env
-    )
-
-
-def get_command_output(cmd: list[str], env: dict = None) -> str:
-    """Run a command and return its stdout."""
-    result = run_command(cmd, env=env)
-    return result.stdout.strip()
-
-
-def check_prerequisites() -> dict[str, bool]:
-    """Check which prerequisites are installed."""
-    print_step("Checking prerequisites...")
-
-    prereqs = {
-        "uv": command_exists("uv"),
-        "node": command_exists("node"),
-        "npm": command_exists("npm"),
-        "databricks": command_exists("databricks"),
-    }
-
-    for name, installed in prereqs.items():
-        if installed:
-            try:
-                if name == "uv":
-                    version = get_command_output(["uv", "--version"])
-                elif name == "node":
-                    version = get_command_output(["node", "--version"])
-                elif name == "npm":
-                    version = get_command_output(["npm", "--version"])
-                elif name == "databricks":
-                    version = get_command_output(["databricks", "--version"])
-                print_success(f"{name} is installed: {version}")
-            except Exception:
-                print_success(f"{name} is installed")
-        else:
-            print(f"  {name} is not installed")
-
-    return prereqs
-
-
-def check_missing_prerequisites(prereqs: dict[str, bool]) -> list[str]:
-    """Return list of missing prerequisites with install instructions."""
-    missing = []
-
-    if not prereqs["uv"]:
-        missing.append("uv - Install with: curl -LsSf https://astral.sh/uv/install.sh | sh")
-
-    if not prereqs["node"] or not prereqs["npm"]:
-        missing.append("Node.js 20 - Install with: nvm install 20 (or download from nodejs.org)")
-
-    if not prereqs["databricks"]:
-        if platform.system() == "Darwin":
-            missing.append("Databricks CLI - Install with: brew install databricks/tap/databricks")
-        else:
-            missing.append(
-                "Databricks CLI - Install with: curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh"
-            )
-
-    if missing:
-        missing.append(
-            "Note: These install commands are for Unix/macOS. For Windows, please visit the official documentation for each tool."
-        )
-
-    return missing
-
-
-def check_node_version() -> str | None:
-    """Check if the installed Node.js version meets Vite's requirements.
-
-    Vite requires Node.js >=20.19, >=22.12, or >=23.
-    Node 21.x is an odd-numbered release and not supported.
-
-    Returns None if the version is OK, or an error string if not.
-    """
-    if not command_exists("node"):
-        return None  # Missing node is handled by check_missing_prerequisites
-
-    try:
-        version_str = get_command_output(["node", "--version"])
-    except Exception:
-        return None
-
-    match = re.match(r"v(\d+)\.(\d+)\.(\d+)", version_str)
-    if not match:
-        return None
-
-    major, minor = int(match.group(1)), int(match.group(2))
-
-    # Node 21.x is odd-numbered and not a Vite target
-    if major == 21:
-        return (
-            f"Node.js {version_str} is not supported by Vite (odd-numbered release).\n"
-            "  Please install Node.js 20.19+, 22.12+, or 23+.\n"
-            "  Run: nvm install 22"
-        )
-
-    # Check supported version ranges
-    if major == 20 and minor >= 19:
-        return None
-    if major == 22 and minor >= 12:
-        return None
-    if major >= 23:
-        return None
-
-    # Version is too old or unsupported
-    if major == 20:
-        return (
-            f"Node.js {version_str} is too old for Vite (requires 20.19+).\n"
-            f"  Your version: {version_str}\n"
-            "  Run: nvm install 20  (to get latest 20.x)"
-        )
-    if major == 22:
-        return (
-            f"Node.js {version_str} is too old for Vite (requires 22.12+).\n"
-            f"  Your version: {version_str}\n"
-            "  Run: nvm install 22  (to get latest 22.x)"
-        )
-
-    if major < 20:
-        return (
-            f"Node.js {version_str} is too old for Vite (requires 20.19+).\n"
-            f"  Your version: {version_str}\n"
-            "  Run: nvm install 22"
-        )
-
-    return (
-        f"Node.js {version_str} is not supported by Vite.\n"
-        "  Vite requires Node.js 20.19+, 22.12+, or 23+.\n"
-        "  Run: nvm install 22"
-    )
-
-
-def setup_env_file() -> None:
-    """Copy .env.example to .env if it doesn't exist."""
-    print_step("Setting up configuration files...")
-
-    env_local = Path(".env")
-    env_example = Path(".env.example")
-
-    if env_local.exists():
-        print("  .env already exists, skipping copy...")
-    elif env_example.exists():
-        shutil.copy(env_example, env_local)
-        print_success("Copied .env.example to .env")
-    else:
-        # Create a minimal .env
-        env_local.write_text(
-            "# Databricks configuration\n"
-            "DATABRICKS_CONFIG_PROFILE=DEFAULT\n"
-            "MLFLOW_EXPERIMENT_ID=\n"
-            'MLFLOW_TRACKING_URI="databricks"\n'
-            'MLFLOW_REGISTRY_URI="databricks-uc"\n'
-        )
-        print_success("Created .env")
-
-
-def update_env_file(key: str, value: str) -> None:
-    """Update or add a key-value pair in .env."""
-    env_file = Path(".env")
-
-    if not env_file.exists():
-        env_file.write_text(f"{key}={value}\n")
-        return
-
-    content = env_file.read_text()
-
-    # Check if key exists (with or without quotes, with any value)
-    pattern = rf"^{re.escape(key)}=.*$"
-    if re.search(pattern, content, re.MULTILINE):
-        # Replace existing key
-        content = re.sub(pattern, f"{key}={value}", content, flags=re.MULTILINE)
-    else:
-        # Add new key
-        if not content.endswith("\n"):
-            content += "\n"
-        content += f"{key}={value}\n"
-
-    env_file.write_text(content)
-
-
-def get_databricks_profiles() -> list[dict]:
-    """Get list of existing Databricks profiles."""
-    try:
-        result = run_command(["databricks", "auth", "profiles"], check=False)
-        if result.returncode != 0 or not result.stdout.strip():
-            return []
-
-        lines = result.stdout.strip().split("\n")
-        if len(lines) <= 1:  # Only header or empty
-            return []
-
-        # Parse the output - first line is header
-        profiles = []
-        for line in lines[1:]:
-            if line.strip():
-                # Profile name is the first column
-                parts = line.split()
-                if parts:
-                    profiles.append(
-                        {
-                            "name": parts[0],
-                            "line": line,
-                        }
-                    )
-
-        return profiles
-    except Exception:
-        return []
-
-
-def validate_profile(profile_name: str) -> bool:
-    """Test if a Databricks profile is authenticated."""
-    try:
-        env = {"DATABRICKS_CONFIG_PROFILE": profile_name}
-        result = run_command(
-            ["databricks", "current-user", "me"],
-            check=False,
-            env=env,
-        )
-        return result.returncode == 0
-    except Exception:
-        return False
-
-
-def authenticate_profile(profile_name: str, host: str = None) -> bool:
-    """Authenticate a Databricks profile."""
-    print(f"\nAuthenticating profile '{profile_name}'...")
-    print("You will be prompted to log in to Databricks in your browser.\n")
-
-    cmd = ["databricks", "auth", "login", "--profile", profile_name]
-    if host:
-        cmd.extend(["--host", host])
-
-    try:
-        # Run interactively so user can see browser prompt
-        result = subprocess.run(cmd)
-        return result.returncode == 0
-    except Exception as e:
-        print_error(f"Authentication failed: {e}")
-        return False
-
-
-def select_profile_interactive(profiles: list[dict]) -> str:
-    """Let user select a profile interactively."""
-    print("\nFound existing Databricks profiles:\n")
-
-    # Print header and profiles
-    for i, profile in enumerate(profiles, 1):
-        print(f"  {i}) {profile['line']}")
-
-    print()
-
-    while True:
-        choice = input("Enter the number of the profile you want to use: ").strip()
-        if not choice:
-            print_error("Profile selection is required")
-            continue
-
-        try:
-            index = int(choice) - 1
-            if 0 <= index < len(profiles):
-                return profiles[index]["name"]
-            else:
-                print_error(f"Please choose a number between 1 and {len(profiles)}")
-        except ValueError:
-            print_error("Please enter a valid number")
-
-
-def setup_databricks_auth(profile_arg: str = None, host_arg: str = None) -> str:
-    """Set up Databricks authentication and return the profile name."""
-    print_step("Setting up Databricks authentication...")
-
-    # If profile was specified via CLI, use it directly
-    if profile_arg:
-        profile_name = profile_arg
-        print(f"Using specified profile: {profile_name}")
-    else:
-        # Check for existing profiles
-        profiles = get_databricks_profiles()
-
-        if profiles:
-            profile_name = select_profile_interactive(profiles)
-            print(f"\nSelected profile: {profile_name}")
-        else:
-            # No profiles exist - need to create one
-            profile_name = None
-
-    # Validate or authenticate the profile
-    if profile_name:
-        if validate_profile(profile_name):
-            print_success(f"Successfully validated profile '{profile_name}'")
-        else:
-            print(f"Profile '{profile_name}' is not authenticated.")
-            if not authenticate_profile(profile_name):
-                print_error(f"Failed to authenticate profile '{profile_name}'")
-                print_troubleshooting_auth()
-                sys.exit(1)
-            print_success(f"Successfully authenticated profile '{profile_name}'")
-    else:
-        # Create new profile
-        print("No existing profiles found. Setting up Databricks authentication...")
-
-        if host_arg:
-            host = host_arg
-            print(f"Using specified host: {host}")
-        else:
-            host = input(
-                "\nPlease enter your Databricks host URL\n(e.g., https://your-workspace.cloud.databricks.com): "
-            ).strip()
-
-            if not host:
-                print_error("Databricks host is required")
-                sys.exit(1)
-
-        profile_name = "DEFAULT"
-        if not authenticate_profile(profile_name, host):
-            print_error("Databricks authentication failed")
-            print_troubleshooting_auth()
-            sys.exit(1)
-        print_success(f"Successfully authenticated with Databricks")
-
-    # Update .env with profile
-    update_env_file("DATABRICKS_CONFIG_PROFILE", profile_name)
-    update_env_file("MLFLOW_TRACKING_URI", f'"databricks://{profile_name}"')
-    print_success(f"Databricks profile '{profile_name}' saved to .env")
-
-    return profile_name
-
-
-def get_databricks_host(profile_name: str) -> str:
-    """Get the Databricks workspace host URL from the profile."""
-    try:
-        result = run_command(
-            ["databricks", "auth", "env", "--profile", profile_name, "--output", "json"],
-            check=False,
-        )
-        if result.returncode == 0:
-            env_data = json.loads(result.stdout)
-            env_vars = env_data.get("env", {})
-            host = env_vars.get("DATABRICKS_HOST", "")
-            return host.rstrip("/")
-    except Exception:
-        pass
-    return ""
-
-
-def get_databricks_username(profile_name: str) -> str:
-    """Get the current Databricks username."""
-    try:
-        result = run_command(
-            ["databricks", "-p", profile_name, "current-user", "me", "--output", "json"]
-        )
-        user_data = json.loads(result.stdout)
-        return user_data.get("userName", "")
-    except Exception as e:
-        print_error(f"Failed to get Databricks username: {e}")
-        print_troubleshooting_api()
-        sys.exit(1)
-
-
-def create_mlflow_experiment(profile_name: str, username: str) -> tuple[str, str]:
-    """Create an MLflow experiment and return (name, id)."""
-    print_step("Creating MLflow experiment...")
-
-    experiment_name = f"/Users/{username}/agents-on-apps"
-
-    try:
-        # Try to create with default name
-        result = run_command(
-            [
-                "databricks",
-                "-p",
-                profile_name,
-                "experiments",
-                "create-experiment",
-                experiment_name,
-                "--output",
-                "json",
-            ],
-            check=False,
-        )
-
-        if result.returncode == 0:
-            experiment_id = json.loads(result.stdout).get("experiment_id", "")
-            print_success(f"Created experiment '{experiment_name}' with ID: {experiment_id}")
-            return experiment_name, experiment_id
-
-        # Name already exists, try with random suffix
-        print("Experiment name already exists, creating with random suffix...")
-        random_suffix = secrets.token_hex(4)
-        experiment_name = f"/Users/{username}/agents-on-apps-{random_suffix}"
-
-        result = run_command(
-            [
-                "databricks",
-                "-p",
-                profile_name,
-                "experiments",
-                "create-experiment",
-                experiment_name,
-                "--output",
-                "json",
-            ]
-        )
-        experiment_id = json.loads(result.stdout).get("experiment_id", "")
-        print_success(f"Created experiment '{experiment_name}' with ID: {experiment_id}")
-        return experiment_name, experiment_id
-
-    except Exception as e:
-        print_error(f"Failed to create MLflow experiment: {e}")
-        print_troubleshooting_api()
-        sys.exit(1)
-
-
-def check_lakebase_required() -> bool:
-    """Check if databricks.yml has LAKEBASE_INSTANCE_NAME configured."""
-    databricks_yml = Path("databricks.yml")
-    if not databricks_yml.exists():
-        return False
-
-    content = databricks_yml.read_text()
-    return "LAKEBASE_INSTANCE_NAME" in content
-
-
-def get_env_value(key: str) -> str:
-    """Get a value from .env file."""
-    env_file = Path(".env")
-    if not env_file.exists():
-        return ""
-
-    content = env_file.read_text()
-    pattern = rf"^{re.escape(key)}=(.*)$"
-    match = re.search(pattern, content, re.MULTILINE)
-    if match:
-        return match.group(1).strip().strip('"').strip("'")
-    return ""
-
-
-def validate_lakebase_instance(profile_name: str, lakebase_name: str) -> dict | None:
-    """Validate that the Lakebase instance exists and user has access.
-
-    Returns the instance info dict on success, None on failure.
-    """
-    print(f"Validating Lakebase instance '{lakebase_name}'...")
-
-    result = run_command(
-        [
-            "databricks",
-            "-p",
-            profile_name,
-            "database",
-            "get-database-instance",
-            lakebase_name,
-            "--output",
-            "json",
-        ],
-        check=False,
-    )
-
-    if result.returncode == 0:
-        print_success(f"Lakebase instance '{lakebase_name}' validated")
-        return json.loads(result.stdout)
-
-    # Check if database command is not recognized (old CLI version)
-    if 'unknown command "database" for "databricks"' in (result.stderr or ""):
-        print_error(
-            "The 'databricks database' command requires a newer version of the Databricks CLI."
-        )
-        print("  Please upgrade: https://docs.databricks.com/dev-tools/cli/install.html")
-        return None
-
-    error_msg = result.stderr.lower() if result.stderr else ""
-    if "not found" in error_msg:
-        print_error(
-            f"Lakebase instance '{lakebase_name}' not found. Please check the instance name."
-        )
-    elif "permission" in error_msg or "forbidden" in error_msg or "unauthorized" in error_msg:
-        print_error(f"No permission to access Lakebase instance '{lakebase_name}'")
-    else:
-        print_error(
-            f"Failed to validate Lakebase instance: {result.stderr.strip() if result.stderr else 'Unknown error'}"
-        )
-    return None
-
-
-def setup_lakebase(profile_name: str, username: str, lakebase_arg: str = None) -> str:
-    """Set up Lakebase instance for memory features."""
-    print_step("Setting up Lakebase instance for memory...")
-
-    lakebase_name = None
-
-    # If --lakebase was provided, use it directly
-    if lakebase_arg:
-        lakebase_name = lakebase_arg
-        print(f"Using provided Lakebase instance: {lakebase_name}")
-    else:
-        # Check if already set in .env
-        existing = get_env_value("LAKEBASE_INSTANCE_NAME")
-        if existing:
-            print(f"Found existing Lakebase instance in .env: {existing}")
-            new_value = input(
-                "Press Enter to keep this value, or enter a new instance name: "
-            ).strip()
-            lakebase_name = new_value if new_value else existing
-        else:
-            # Interactive mode - prompt for instance name
-            lakebase_name = input("Please enter your Lakebase instance name: ").strip()
-
-            if not lakebase_name:
-                print_error("Lakebase instance name is required for memory features")
-                sys.exit(1)
-
-    # Validate that the Lakebase instance exists and user has access
-    instance_info = validate_lakebase_instance(profile_name, lakebase_name)
-    if not instance_info:
-        sys.exit(1)
-
-    # Update .env with the Lakebase instance name
-    update_env_file("LAKEBASE_INSTANCE_NAME", lakebase_name)
-    print_success(f"Lakebase instance name '{lakebase_name}' saved to .env")
-
-    # Set up PostgreSQL connection environment variables
-    pg_host = instance_info.get("read_write_dns", "")
-    if pg_host:
-        update_env_file("PGHOST", pg_host)
-        print_success(f"PGHOST set to '{pg_host}'")
-    else:
-        print_error("Could not get read_write_dns from Lakebase instance")
-
-    update_env_file("PGUSER", username)
-    print_success(f"PGUSER set to '{username}'")
-
-    update_env_file("PGDATABASE", "databricks_postgres")
-    print_success("PGDATABASE set to 'databricks_postgres'")
-
-    return lakebase_name
-
-
-def update_databricks_yml_experiment(experiment_id: str) -> None:
-    """Update databricks.yml to set the experiment ID in the app resource."""
-    yml_path = Path("databricks.yml")
-    if not yml_path.exists():
-        return
-
-    content = yml_path.read_text()
-
-    # Set the experiment_id in the app's experiment resource
-    content = re.sub(
-        r'(experiment_id: )"[^"]*"',
-        f'\\1"{experiment_id}"',
-        content,
-    )
-
-    yml_path.write_text(content)
-    print_success("Updated databricks.yml with experiment ID")
-
-
-def update_databricks_yml_lakebase(lakebase_name: str) -> None:
-    """Update databricks.yml to replace lakebase placeholder with actual instance name."""
-    yml_path = Path("databricks.yml")
-    if not yml_path.exists():
-        return
-
-    content = yml_path.read_text()
-    if "<your-lakebase-instance-name>" not in content:
-        return
-
-    content = content.replace("<your-lakebase-instance-name>", lakebase_name)
-    yml_path.write_text(content)
-    print_success("Updated databricks.yml with Lakebase instance name")
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Quickstart setup for Databricks agent development",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-    uv run quickstart                    # Interactive setup
-    uv run quickstart --profile DEFAULT  # Use existing profile (non-interactive)
-    uv run quickstart --host https://...  # Set up new profile with host
-    uv run quickstart --lakebase my-db   # Include Lakebase setup for memory
-        """,
-    )
-    parser.add_argument(
-        "--profile",
-        help="Use specified Databricks profile (non-interactive)",
-        metavar="NAME",
-    )
-    parser.add_argument(
-        "--host",
-        help="Databricks workspace URL (for initial setup)",
-        metavar="URL",
-    )
-    parser.add_argument(
-        "--lakebase",
-        help="Lakebase instance name (for memory features)",
-        metavar="NAME",
-    )
-
-    args = parser.parse_args()
-
-    try:
-        print_header("Agent on Apps - Quickstart Setup")
-
-        # Step 1: Check prerequisites
-        prereqs = check_prerequisites()
-        missing = check_missing_prerequisites(prereqs)
-
-        if missing:
-            print_step("Missing prerequisites:")
-            for item in missing:
-                print(f"  • {item}")
-            print("\nPlease install the missing prerequisites and run this script again.")
-            sys.exit(1)
-
-        # Check Node.js version meets Vite requirements
-        node_error = check_node_version()
-        if node_error:
-            print_error(f"Node.js version check failed:\n  {node_error}")
-            sys.exit(1)
-
-        # Step 2: Set up .env
-        setup_env_file()
-
-        # Step 3: Databricks authentication
-        profile_name = setup_databricks_auth(args.profile, args.host)
-
-        # Step 4: Get username and create MLflow experiment
-        print_step("Getting Databricks username...")
-        username = get_databricks_username(profile_name)
-        print(f"Username: {username}")
-
-        experiment_name, experiment_id = create_mlflow_experiment(profile_name, username)
-
-        # Step 5: Update .env with experiment ID
-        update_env_file("MLFLOW_EXPERIMENT_ID", experiment_id)
-        print_success("Updated .env with experiment ID")
-
-        # Step 5b: Update databricks.yml to use literal experiment ID
-        update_databricks_yml_experiment(experiment_id)
-
-        # Step 6: Lakebase setup (if needed for memory features)
-        lakebase_name = None
-        lakebase_required = args.lakebase or check_lakebase_required()
-        if lakebase_required:
-            lakebase_name = setup_lakebase(profile_name, username, args.lakebase)
-            update_databricks_yml_lakebase(lakebase_name)
-
-        # Final summary
-        host = get_databricks_host(profile_name)
-
-        print_header("Setup Complete!")
-        summary = f"""
-✓ Prerequisites verified (uv, Node.js, Databricks CLI)
-✓ Databricks authenticated with profile: {profile_name}
-✓ Configuration files created (.env)
-
-✓ MLflow experiment created for tracing and evaluation: {experiment_name}
-✓ Experiment ID: {experiment_id}"""
-
-        if host and experiment_id:
-            summary += f"\n  {host}/ml/experiments/{experiment_id}"
-
-        if lakebase_name:
-            summary += f"\n\n✓ Lakebase instance: {lakebase_name}"
-            summary += "\n✓ PostgreSQL variables set (PGHOST, PGUSER, PGDATABASE)"
-            if host:
-                summary += f"\n  {host}/lakebase/provisioned/{lakebase_name}"
-
-        summary += "\nNext step: Run 'uv run start-app' to start the agent locally\n"
-        print(summary)
-
-    except KeyboardInterrupt:
-        print("\n\nSetup cancelled.")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/agent-supervisor-api/scripts/start_app.py b/agent-supervisor-api/scripts/start_app.py
deleted file mode 100644
index 58625601..00000000
--- a/agent-supervisor-api/scripts/start_app.py
+++ /dev/null
@@ -1,332 +0,0 @@
-#!/usr/bin/env python3
-"""
-Start script for running frontend and backend processes concurrently.
-
-Requirements:
-1. Not reporting ready until BOTH frontend and backend processes are ready
-2. Exiting as soon as EITHER process fails
-3. Printing error logs if either process fails
-
-Usage:
-    start-app [OPTIONS]
-
-All options are passed through to the backend server (start-server).
-See 'uv run start-server --help' for available options.
-"""
-
-import argparse
-import os
-import re
-import shutil
-import socket
-import subprocess
-import sys
-import threading
-import time
-from pathlib import Path
-
-from dotenv import load_dotenv
-
-# Readiness patterns
-BACKEND_READY = [r"Uvicorn running on", r"Application startup complete", r"Started server process"]
-FRONTEND_READY = [r"Server is running on http://localhost"]
-
-
-def check_port_available(port: int) -> bool:
-    """Check if a port is available by attempting to bind to it."""
-    try:
-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            s.bind(("localhost", port))
-        return True
-    except OSError:
-        return False
-
-
-class ProcessManager:
-    def __init__(self, port=8000, no_ui=False):
-        self.backend_process = None
-        self.frontend_process = None
-        self.backend_ready = False
-        self.frontend_ready = False
-        self.failed = threading.Event()
-        self.backend_log = None
-        self.frontend_log = None
-        self.port = port
-        self.no_ui = no_ui
-
-    def check_ports(self):
-        """Check that required ports are available before starting processes."""
-        backend_port = self.port
-
-        errors = []
-        if not check_port_available(backend_port):
-            errors.append(
-                f"Port {backend_port} (backend) is already in use.\n"
-                f"  To free it: lsof -ti :{backend_port} | xargs kill -9"
-            )
-
-        if not self.no_ui:
-            frontend_port = int(os.environ.get("CHAT_APP_PORT", os.environ.get("PORT", "3000")))
-
-            if backend_port == frontend_port:
-                print(
-                    f"ERROR: Backend and frontend are both configured to use port {backend_port}."
-                )
-                print("  Set CHAT_APP_PORT in .env to a different port (e.g., CHAT_APP_PORT=3000).")
-                sys.exit(1)
-
-            if not check_port_available(frontend_port):
-                port_source = (
-                    "CHAT_APP_PORT"
-                    if os.environ.get("CHAT_APP_PORT")
-                    else "PORT"
-                    if os.environ.get("PORT")
-                    else "default"
-                )
-                errors.append(
-                    f"Port {frontend_port} (frontend, source: {port_source}) is already in use.\n"
-                    f"  To free it: lsof -ti :{frontend_port} | xargs kill -9\n"
-                    f"  Or set a different port: CHAT_APP_PORT=<port> in .env"
-                )
-
-        if errors:
-            print("ERROR: Port(s) already in use:\n")
-            for error in errors:
-                print(f"  {error}\n")
-            sys.exit(1)
-
-    def monitor_process(self, process, name, log_file, patterns):
-        is_ready = False
-        try:
-            for line in iter(process.stdout.readline, ""):
-                if not line:
-                    break
-
-                line = line.rstrip()
-                log_file.write(line + "\n")
-                print(f"[{name}] {line}")
-
-                # Check readiness
-                if not is_ready and any(re.search(p, line, re.IGNORECASE) for p in patterns):
-                    is_ready = True
-                    if name == "backend":
-                        self.backend_ready = True
-                    else:
-                        self.frontend_ready = True
-                    print(f"✓ {name.capitalize()} is ready!")
-
-                    if self.no_ui and self.backend_ready:
-                        print("\n" + "=" * 50)
-                        print("✓ Backend is ready! (running without UI)")
-                        print(f"✓ API available at http://localhost:{self.port}")
-                        print("=" * 50 + "\n")
-                    elif self.backend_ready and self.frontend_ready:
-                        print("\n" + "=" * 50)
-                        print("✓ Both frontend and backend are ready!")
-                        print(f"✓ Open the frontend at http://localhost:{self.port}")
-                        print("=" * 50 + "\n")
-
-            process.wait()
-            if process.returncode != 0:
-                self.failed.set()
-
-        except Exception as e:
-            print(f"Error monitoring {name}: {e}")
-            self.failed.set()
-
-    def clone_frontend_if_needed(self):
-        if Path("e2e-chatbot-app-next").exists():
-            return True
-
-        print("Cloning e2e-chatbot-app-next...")
-        for url in [
-            "https://github.com/databricks/app-templates.git",
-            "git@github.com:databricks/app-templates.git",
-        ]:
-            try:
-                subprocess.run(
-                    ["git", "clone", "--filter=blob:none", "--sparse", url, "temp-app-templates"],
-                    check=True,
-                    capture_output=True,
-                )
-                break
-            except subprocess.CalledProcessError:
-                continue
-        else:
-            print("ERROR: Failed to clone repository.")
-            print(
-                "Manually download from: https://download-directory.github.io/?url=https://github.com/databricks/app-templates/tree/main/e2e-chatbot-app-next"
-            )
-            return False
-
-        subprocess.run(
-            ["git", "sparse-checkout", "set", "e2e-chatbot-app-next"],
-            cwd="temp-app-templates",
-            check=True,
-        )
-        Path("temp-app-templates/e2e-chatbot-app-next").rename("e2e-chatbot-app-next")
-        shutil.rmtree("temp-app-templates", ignore_errors=True)
-        return True
-
-    def start_process(self, cmd, name, log_file, patterns, cwd=None):
-        print(f"Starting {name}...")
-        process = subprocess.Popen(
-            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, cwd=cwd
-        )
-
-        thread = threading.Thread(
-            target=self.monitor_process, args=(process, name, log_file, patterns), daemon=True
-        )
-        thread.start()
-        return process
-
-    def print_logs(self, log_path):
-        print(f"\nLast 50 lines of {log_path}:")
-        print("-" * 40)
-        try:
-            lines = Path(log_path).read_text().splitlines()
-            print("\n".join(lines[-50:]))
-        except FileNotFoundError:
-            print(f"(no {log_path} found)")
-        print("-" * 40)
-
-    def cleanup(self):
-        print("\n" + "=" * 42)
-        print("Shutting down..." if self.no_ui else "Shutting down both processes...")
-        print("=" * 42)
-
-        for proc in [self.backend_process, self.frontend_process]:
-            if proc:
-                try:
-                    proc.terminate()
-                    proc.wait(timeout=5)
-                except (subprocess.TimeoutExpired, Exception):
-                    proc.kill()
-
-        if self.backend_log:
-            self.backend_log.close()
-        if self.frontend_log:
-            self.frontend_log.close()
-
-    def run(self, backend_args=None):
-        load_dotenv(dotenv_path=".env", override=True)
-        if not os.environ.get("DATABRICKS_APP_NAME"):
-            self.check_ports()
-
-        if not self.no_ui:
-            if not self.clone_frontend_if_needed():
-                print("WARNING: Failed to clone frontend. Continuing with backend only.")
-                self.no_ui = True
-            else:
-                # Set API_PROXY environment variable for frontend to connect to backend
-                os.environ["API_PROXY"] = f"http://localhost:{self.port}/invocations"
-
-        # Open log files
-        self.backend_log = open("backend.log", "w", buffering=1)
-        if not self.no_ui:
-            self.frontend_log = open("frontend.log", "w", buffering=1)
-
-        try:
-            # Build backend command, passing through all arguments
-            backend_cmd = ["uv", "run", "start-server"]
-            if backend_args:
-                backend_cmd.extend(backend_args)
-
-            # Start backend
-            self.backend_process = self.start_process(
-                backend_cmd, "backend", self.backend_log, BACKEND_READY
-            )
-
-            if not self.no_ui:
-                # Setup and start frontend
-                frontend_dir = Path("e2e-chatbot-app-next")
-                for cmd, desc in [("npm install", "install"), ("npm run build", "build")]:
-                    print(f"Running npm {desc}...")
-                    result = subprocess.run(
-                        cmd.split(), cwd=frontend_dir, capture_output=True, text=True
-                    )
-                    if result.returncode != 0:
-                        print(f"npm {desc} failed: {result.stderr}")
-                        return 1
-
-                self.frontend_process = self.start_process(
-                    ["npm", "run", "start"],
-                    "frontend",
-                    self.frontend_log,
-                    FRONTEND_READY,
-                    cwd=frontend_dir,
-                )
-
-                print(
-                    f"\nMonitoring processes (Backend PID: {self.backend_process.pid}, Frontend PID: {self.frontend_process.pid})\n"
-                )
-            else:
-                print(f"\nMonitoring backend process (PID: {self.backend_process.pid})\n")
-
-            # Wait for failure
-            while not self.failed.is_set():
-                time.sleep(0.1)
-                if self.backend_process.poll() is not None:
-                    self.failed.set()
-                    break
-                if (
-                    not self.no_ui
-                    and self.frontend_process
-                    and self.frontend_process.poll() is not None
-                ):
-                    self.failed.set()
-                    break
-
-            # Determine which failed
-            if self.no_ui or self.backend_process.poll() is not None:
-                failed_name = "backend"
-                failed_proc = self.backend_process
-            else:
-                failed_name = "frontend"
-                failed_proc = self.frontend_process
-            exit_code = failed_proc.returncode if failed_proc else 1
-
-            print(
-                f"\n{'=' * 42}\nERROR: {failed_name} process exited with code {exit_code}\n{'=' * 42}"
-            )
-            self.print_logs("backend.log")
-            if not self.no_ui:
-                self.print_logs("frontend.log")
-            return exit_code
-
-        except KeyboardInterrupt:
-            print("\nInterrupted")
-            return 0
-
-        finally:
-            self.cleanup()
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Start agent frontend and backend",
-        usage="%(prog)s [OPTIONS]\n\nAll options are passed through to start-server. "
-        "Use 'uv run start-server --help' for available options.",
-    )
-    parser.add_argument(
-        "--no-ui",
-        action="store_true",
-        help="Run backend only, skip frontend UI",
-    )
-    args, backend_args = parser.parse_known_args()
-
-    # Extract port from backend_args if specified
-    port = 8000
-    for i, arg in enumerate(backend_args):
-        if arg == "--port" and i + 1 < len(backend_args):
-            try:
-                port = int(backend_args[i + 1])
-            except ValueError:
-                pass
-            break
-
-    sys.exit(ProcessManager(port=port, no_ui=args.no_ui).run(backend_args))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/agent-supervisor-api/tests/__init__.py b/agent-supervisor-api/tests/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/agent-supervisor-api/tests/test_agent.py b/agent-supervisor-api/tests/test_agent.py
deleted file mode 100644
index eeafa30e..00000000
--- a/agent-supervisor-api/tests/test_agent.py
+++ /dev/null
@@ -1,244 +0,0 @@
-"""Tests for the agent-supervisor-api template.
-
-Unit tests run without credentials. Integration tests require setting
-DATABRICKS_HOST and DATABRICKS_TOKEN (or a configured CLI profile).
-
-Integration test target:
-  Host: https://eng-ml-inference.staging.cloud.databricks.com
-  AI Gateway: https://1653573648247579.ai-gateway.staging.cloud.databricks.com/mlflow/v1/responses
-  LiteSwap header: x-databricks-traffic-id: testenv://liteswap/mas-arv
-  Model: databricks-claude-opus-4-6
-"""
-import os
-from unittest.mock import MagicMock, patch
-
-import pytest
-from mlflow.types.responses import ResponsesAgentRequest, ResponsesAgentResponse
-
-
-# --- Unit tests (no credentials needed) ---
-
-
-def test_module_imports():
-    """Agent module imports cleanly without auth."""
-    import agent_server.agent as agent
-    assert hasattr(agent, "MODEL")
-    assert hasattr(agent, "TOOLS")
-    assert hasattr(agent, "invoke_handler")
-    assert hasattr(agent, "stream_handler")
-    assert hasattr(agent, "_get_client")
-
-
-def test_ai_gateway_base_url():
-    """_ai_gateway_base_url derives the AI Gateway URL from workspace host + workspace ID."""
-    from agent_server.agent import _ai_gateway_base_url
-
-    mock_wc = MagicMock()
-    mock_wc.config.host = "https://my-workspace.cloud.databricks.com"
-    mock_wc.get_workspace_id.return_value = 1234567890
-
-    url = _ai_gateway_base_url(mock_wc)
-    assert url == "https://1234567890.ai-gateway.cloud.databricks.com/mlflow/v1"
-
-
-def test_ai_gateway_base_url_staging():
-    """Works for staging workspaces too."""
-    from agent_server.agent import _ai_gateway_base_url
-
-    mock_wc = MagicMock()
-    mock_wc.config.host = "https://eng-ml-inference.staging.cloud.databricks.com"
-    mock_wc.get_workspace_id.return_value = 1653573648247579
-
-    url = _ai_gateway_base_url(mock_wc)
-    assert url == "https://1653573648247579.ai-gateway.staging.cloud.databricks.com/mlflow/v1"
-
-
-def test_get_client_uses_ai_gateway_url():
-    """_get_client configures DatabricksOpenAI with the AI Gateway base URL."""
-    from agent_server.agent import _get_client
-
-    mock_wc = MagicMock()
-    mock_wc.config.host = "https://my-workspace.cloud.databricks.com"
-    mock_wc.get_workspace_id.return_value = 1234567890
-
-    captured = {}
-
-    def fake_databricks_openai(**kwargs):
-        captured.update(kwargs)
-        return MagicMock()
-
-    with patch("agent_server.agent.WorkspaceClient", return_value=mock_wc), \
-         patch("agent_server.agent.DatabricksOpenAI", side_effect=fake_databricks_openai):
-        _get_client()
-
-    assert captured["base_url"] == "https://1234567890.ai-gateway.cloud.databricks.com/mlflow/v1"
-    assert captured["workspace_client"] is mock_wc
-
-
-def test_tools_structure():
-    """TOOLS list has correct structure."""
-    from agent_server.agent import TOOLS
-    assert len(TOOLS) >= 1
-    tool = TOOLS[0]
-    assert "type" in tool
-    assert tool["type"] in ("uc_function", "genie", "agent_endpoint", "mcp")
-
-
-def test_model_is_string():
-    from agent_server.agent import MODEL
-    assert isinstance(MODEL, str)
-    assert len(MODEL) > 0
-
-
-def test_get_session_id_from_conversation_id():
-    from agent_server.utils import get_session_id
-    req = MagicMock(spec=ResponsesAgentRequest)
-    req.context = MagicMock()
-    req.context.conversation_id = "conv-123"
-    assert get_session_id(req) == "conv-123"
-
-
-def test_get_session_id_from_custom_inputs():
-    from agent_server.utils import get_session_id
-    req = MagicMock(spec=ResponsesAgentRequest)
-    req.context = None
-    req.custom_inputs = {"session_id": "sess-456"}
-    assert get_session_id(req) == "sess-456"
-
-
-def test_get_session_id_returns_none():
-    from agent_server.utils import get_session_id
-    req = MagicMock(spec=ResponsesAgentRequest)
-    req.context = None
-    req.custom_inputs = None
-    assert get_session_id(req) is None
-
-
-def test_invoke_handler_calls_responses_create():
-    """invoke_handler calls client.responses.create with correct params."""
-    from agent_server.agent import MODEL, TOOLS
-
-    mock_item = MagicMock()
-    mock_item.model_dump.return_value = {"type": "message", "id": "msg_001", "role": "assistant", "content": [{"type": "output_text", "text": "hi"}]}
-    mock_response = MagicMock()
-    mock_response.output = [mock_item]
-
-    mock_client = MagicMock()
-    mock_client.responses.create.return_value = mock_response
-
-    with patch("agent_server.agent._get_client", return_value=mock_client):
-        req = MagicMock(spec=ResponsesAgentRequest)
-        req.context = None
-        req.custom_inputs = None
-        req.input = [MagicMock()]
-        req.input[0].model_dump.return_value = {"type": "message", "role": "user", "content": "hi"}
-
-        from agent_server.agent import invoke_handler
-        result = invoke_handler(req)
-
-    from agent_server.agent import _EXTRA_HEADERS
-    mock_client.responses.create.assert_called_once_with(
-        model=MODEL,
-        input=[{"type": "message", "role": "user", "content": "hi"}],
-        tools=TOOLS,
-        stream=False,
-        extra_headers=_EXTRA_HEADERS,
-    )
-    assert isinstance(result, ResponsesAgentResponse)
-    assert len(result.output) == 1
-    assert result.output[0].id == "msg_001"
-
-
-def test_stream_handler_calls_responses_create_streaming():
-    """stream_handler calls client.responses.create with stream=True."""
-    from agent_server.agent import MODEL, TOOLS
-
-    mock_client = MagicMock()
-    mock_client.responses.create.return_value = iter([])
-
-    with patch("agent_server.agent._get_client", return_value=mock_client):
-        req = MagicMock(spec=ResponsesAgentRequest)
-        req.context = None
-        req.custom_inputs = None
-        req.input = [MagicMock()]
-        req.input[0].model_dump.return_value = {"type": "message", "role": "user", "content": "hi"}
-
-        from agent_server.agent import stream_handler
-        stream_handler(req)  # Returns the iterator from client
-
-    from agent_server.agent import _EXTRA_HEADERS
-    mock_client.responses.create.assert_called_once_with(
-        model=MODEL,
-        input=[{"type": "message", "role": "user", "content": "hi"}],
-        tools=TOOLS,
-        stream=True,
-        extra_headers=_EXTRA_HEADERS,
-    )
-
-
-# --- Integration tests (require credentials for eng-ml-inference staging) ---
-
-
-INTEGRATION_REASON = (
-    "Integration test requires DATABRICKS_TOKEN and DATABRICKS_HOST for "
-    "eng-ml-inference staging (workspace 1653573648247579). "
-    "Set ENG_ML_INFERENCE_TOKEN to enable."
-)
-
-
-@pytest.mark.skipif(
-    not os.environ.get("ENG_ML_INFERENCE_TOKEN"),
-    reason=INTEGRATION_REASON,
-)
-def test_supervisor_api_basic_call():
-    """End-to-end: call the Supervisor API with a simple prompt (no tools)."""
-    import openai
-
-    token = os.environ["ENG_ML_INFERENCE_TOKEN"]
-    client = openai.OpenAI(
-        base_url="https://1653573648247579.ai-gateway.staging.cloud.databricks.com/mlflow/v1",
-        api_key=token,
-        default_headers={"x-databricks-traffic-id": "testenv://liteswap/mas-arv"},
-    )
-    response = client.responses.create(
-        model="databricks-claude-opus-4-6",
-        input=[{"type": "message", "role": "user", "content": "Reply with just the word 'OK'."}],
-    )
-    assert response.output_text.strip() != ""
-
-
-@pytest.mark.skipif(
-    not os.environ.get("ENG_ML_INFERENCE_TOKEN"),
-    reason=INTEGRATION_REASON,
-)
-def test_supervisor_api_with_genie_tool():
-    """End-to-end: call Supervisor API with Genie tool (the NYC taxi example)."""
-    import openai
-
-    token = os.environ["ENG_ML_INFERENCE_TOKEN"]
-    client = openai.OpenAI(
-        base_url="https://1653573648247579.ai-gateway.staging.cloud.databricks.com/mlflow/v1",
-        api_key=token,
-        default_headers={"x-databricks-traffic-id": "testenv://liteswap/mas-arv"},
-    )
-    response = client.responses.create(
-        model="databricks-claude-opus-4-6",
-        input=[
-            {
-                "type": "message",
-                "role": "user",
-                "content": "What zipcodes do the taxis operate in?",
-            }
-        ],
-        tools=[
-            {
-                "type": "genie",
-                "genie": {
-                    "name": "nyc-taxi-space",
-                    "description": "Information about NYC Taxi spaces",
-                    "space_id": "01f07892cf3118edad0a4054bcd25122",
-                },
-            }
-        ],
-    )
-    assert response.output_text.strip() != ""

From 662b0db7e69f537ba971257877512b18084782cd Mon Sep 17 00:00:00 2001
From: Sid Murching <sid.murching@databricks.com>
Date: Thu, 19 Mar 2026 23:13:12 -0700
Subject: [PATCH 4/6] feat: update use-supervisor-api skill with use_ai_gateway
 and OBO pattern

- Replace manual AI Gateway URL derivation with use_ai_gateway=True
- Add OBO pattern: cache base URL at startup, create per-request client
  with forwarded user token via x-forwarded-access-token custom_input
- Bump minimum databricks-openai version to 0.14.0
- Drop beta/workspace availability caveats; keep real API constraints

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .claude/skills/use-supervisor-api/SKILL.md    | 51 +++++++++++--------
 .../skills/use-supervisor-api/SKILL.md        | 51 +++++++++++--------
 .../skills/use-supervisor-api/SKILL.md        | 51 +++++++++++--------
 .../skills/use-supervisor-api/SKILL.md        | 51 +++++++++++--------
 .../skills/use-supervisor-api/SKILL.md        | 51 +++++++++++--------
 .../skills/use-supervisor-api/SKILL.md        | 51 +++++++++++--------
 .../skills/use-supervisor-api/SKILL.md        | 51 +++++++++++--------
 .../skills/use-supervisor-api/SKILL.md        | 51 +++++++++++--------
 .../skills/use-supervisor-api/SKILL.md        | 51 +++++++++++--------
 9 files changed, 279 insertions(+), 180 deletions(-)

diff --git a/.claude/skills/use-supervisor-api/SKILL.md b/.claude/skills/use-supervisor-api/SKILL.md
index 5f64580f..5d281014 100644
--- a/.claude/skills/use-supervisor-api/SKILL.md
+++ b/.claude/skills/use-supervisor-api/SKILL.md
@@ -5,16 +5,13 @@ description: "Replace the client-side agent loop with Databricks Supervisor API
 
 # Use the Databricks Supervisor API
 
-> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
-
-The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
 
-**Limitations (Beta):**
-- Usage tracking is not supported
+**Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
 - Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
 
@@ -26,7 +23,7 @@ Add to `pyproject.toml` if not already present:
 [project]
 dependencies = [
     ...
-    "databricks-openai>=0.9.0",
+    "databricks-openai>=0.14.0",
     "databricks-sdk>=0.55.0",
 ]
 ```
@@ -82,11 +79,14 @@ TOOLS = [
 
 Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
 
-```python
-import re
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
 
+When deployed on Databricks Apps, the platform forwards the authenticated user's token via `x-forwarded-access-token`. Pass this to the Supervisor API so tool calls (e.g., Genie queries) run on behalf of the user rather than the app's service principal.
+
+```python
 import mlflow
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
 from databricks_openai import DatabricksOpenAI
 from mlflow.genai.agent_server import invoke, stream
 from mlflow.types.responses import (
@@ -99,15 +99,24 @@ mlflow.openai.autolog()
 MODEL = "databricks-claude-sonnet-4-5"
 TOOLS = [...]  # From Step 2
 
+# Resolve and cache the AI Gateway URL once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+_ai_gateway_base_url = str(_client.base_url)
 
-def _get_client() -> DatabricksOpenAI:
-    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
-    wc = WorkspaceClient()
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+def _get_client(obo_token: str | None = None) -> DatabricksOpenAI:
+    """Return a client using the OBO token if provided, else service principal."""
+    if obo_token:
+        obo_wc = WorkspaceClient(
+            config=Config(host=_wc.config.host, token=obo_token)
+        )
+        return DatabricksOpenAI(workspace_client=obo_wc, base_url=_ai_gateway_base_url)
+    return _client
+
+
+def _obo_token(request: ResponsesAgentRequest) -> str | None:
+    return (request.custom_inputs or {}).get("x-forwarded-access-token")
 
 
 @invoke()
@@ -115,7 +124,7 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    response = _get_client().responses.create(
+    response = _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -129,7 +138,7 @@ def stream_handler(request: ResponsesAgentRequest):
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    return _get_client().responses.create(
+    return _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -137,6 +146,8 @@ def stream_handler(request: ResponsesAgentRequest):
     )
 ```
 
+> **OBO note:** The `x-forwarded-access-token` is injected into `custom_inputs` by the app server middleware. No changes are needed to the client — the token arrives automatically when users call your deployed app.
+
 ## Step 4: Grant Permissions in `databricks.yml`
 
 For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
@@ -166,7 +177,7 @@ databricks bundle deploy && databricks bundle run {{BUNDLE_NAME}}  # Deploy
 
 ## Troubleshooting
 
-**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
 **"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
 
diff --git a/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
index 6c5f5686..1a605e73 100644
--- a/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
@@ -5,16 +5,13 @@ description: "Replace the client-side agent loop with Databricks Supervisor API
 
 # Use the Databricks Supervisor API
 
-> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
-
-The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
 
-**Limitations (Beta):**
-- Usage tracking is not supported
+**Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
 - Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
 
@@ -26,7 +23,7 @@ Add to `pyproject.toml` if not already present:
 [project]
 dependencies = [
     ...
-    "databricks-openai>=0.9.0",
+    "databricks-openai>=0.14.0",
     "databricks-sdk>=0.55.0",
 ]
 ```
@@ -82,11 +79,14 @@ TOOLS = [
 
 Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
 
-```python
-import re
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
 
+When deployed on Databricks Apps, the platform forwards the authenticated user's token via `x-forwarded-access-token`. Pass this to the Supervisor API so tool calls (e.g., Genie queries) run on behalf of the user rather than the app's service principal.
+
+```python
 import mlflow
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
 from databricks_openai import DatabricksOpenAI
 from mlflow.genai.agent_server import invoke, stream
 from mlflow.types.responses import (
@@ -99,15 +99,24 @@ mlflow.openai.autolog()
 MODEL = "databricks-claude-sonnet-4-5"
 TOOLS = [...]  # From Step 2
 
+# Resolve and cache the AI Gateway URL once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+_ai_gateway_base_url = str(_client.base_url)
 
-def _get_client() -> DatabricksOpenAI:
-    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
-    wc = WorkspaceClient()
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+def _get_client(obo_token: str | None = None) -> DatabricksOpenAI:
+    """Return a client using the OBO token if provided, else service principal."""
+    if obo_token:
+        obo_wc = WorkspaceClient(
+            config=Config(host=_wc.config.host, token=obo_token)
+        )
+        return DatabricksOpenAI(workspace_client=obo_wc, base_url=_ai_gateway_base_url)
+    return _client
+
+
+def _obo_token(request: ResponsesAgentRequest) -> str | None:
+    return (request.custom_inputs or {}).get("x-forwarded-access-token")
 
 
 @invoke()
@@ -115,7 +124,7 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    response = _get_client().responses.create(
+    response = _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -129,7 +138,7 @@ def stream_handler(request: ResponsesAgentRequest):
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    return _get_client().responses.create(
+    return _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -137,6 +146,8 @@ def stream_handler(request: ResponsesAgentRequest):
     )
 ```
 
+> **OBO note:** The `x-forwarded-access-token` is injected into `custom_inputs` by the app server middleware. No changes are needed to the client — the token arrives automatically when users call your deployed app.
+
 ## Step 4: Grant Permissions in `databricks.yml`
 
 For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
@@ -166,7 +177,7 @@ databricks bundle deploy && databricks bundle run agent_langgraph_long_term_memo
 
 ## Troubleshooting
 
-**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
 **"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
 
diff --git a/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
index 60297de7..af72c082 100644
--- a/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
@@ -5,16 +5,13 @@ description: "Replace the client-side agent loop with Databricks Supervisor API
 
 # Use the Databricks Supervisor API
 
-> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
-
-The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
 
-**Limitations (Beta):**
-- Usage tracking is not supported
+**Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
 - Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
 
@@ -26,7 +23,7 @@ Add to `pyproject.toml` if not already present:
 [project]
 dependencies = [
     ...
-    "databricks-openai>=0.9.0",
+    "databricks-openai>=0.14.0",
     "databricks-sdk>=0.55.0",
 ]
 ```
@@ -82,11 +79,14 @@ TOOLS = [
 
 Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
 
-```python
-import re
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
 
+When deployed on Databricks Apps, the platform forwards the authenticated user's token via `x-forwarded-access-token`. Pass this to the Supervisor API so tool calls (e.g., Genie queries) run on behalf of the user rather than the app's service principal.
+
+```python
 import mlflow
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
 from databricks_openai import DatabricksOpenAI
 from mlflow.genai.agent_server import invoke, stream
 from mlflow.types.responses import (
@@ -99,15 +99,24 @@ mlflow.openai.autolog()
 MODEL = "databricks-claude-sonnet-4-5"
 TOOLS = [...]  # From Step 2
 
+# Resolve and cache the AI Gateway URL once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+_ai_gateway_base_url = str(_client.base_url)
 
-def _get_client() -> DatabricksOpenAI:
-    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
-    wc = WorkspaceClient()
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+def _get_client(obo_token: str | None = None) -> DatabricksOpenAI:
+    """Return a client using the OBO token if provided, else service principal."""
+    if obo_token:
+        obo_wc = WorkspaceClient(
+            config=Config(host=_wc.config.host, token=obo_token)
+        )
+        return DatabricksOpenAI(workspace_client=obo_wc, base_url=_ai_gateway_base_url)
+    return _client
+
+
+def _obo_token(request: ResponsesAgentRequest) -> str | None:
+    return (request.custom_inputs or {}).get("x-forwarded-access-token")
 
 
 @invoke()
@@ -115,7 +124,7 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    response = _get_client().responses.create(
+    response = _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -129,7 +138,7 @@ def stream_handler(request: ResponsesAgentRequest):
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    return _get_client().responses.create(
+    return _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -137,6 +146,8 @@ def stream_handler(request: ResponsesAgentRequest):
     )
 ```
 
+> **OBO note:** The `x-forwarded-access-token` is injected into `custom_inputs` by the app server middleware. No changes are needed to the client — the token arrives automatically when users call your deployed app.
+
 ## Step 4: Grant Permissions in `databricks.yml`
 
 For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
@@ -166,7 +177,7 @@ databricks bundle deploy && databricks bundle run agent_langgraph_short_term_mem
 
 ## Troubleshooting
 
-**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
 **"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
 
diff --git a/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
index 375e3da3..19dac46e 100644
--- a/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
@@ -5,16 +5,13 @@ description: "Replace the client-side agent loop with Databricks Supervisor API
 
 # Use the Databricks Supervisor API
 
-> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
-
-The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
 
-**Limitations (Beta):**
-- Usage tracking is not supported
+**Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
 - Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
 
@@ -26,7 +23,7 @@ Add to `pyproject.toml` if not already present:
 [project]
 dependencies = [
     ...
-    "databricks-openai>=0.9.0",
+    "databricks-openai>=0.14.0",
     "databricks-sdk>=0.55.0",
 ]
 ```
@@ -82,11 +79,14 @@ TOOLS = [
 
 Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
 
-```python
-import re
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
 
+When deployed on Databricks Apps, the platform forwards the authenticated user's token via `x-forwarded-access-token`. Pass this to the Supervisor API so tool calls (e.g., Genie queries) run on behalf of the user rather than the app's service principal.
+
+```python
 import mlflow
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
 from databricks_openai import DatabricksOpenAI
 from mlflow.genai.agent_server import invoke, stream
 from mlflow.types.responses import (
@@ -99,15 +99,24 @@ mlflow.openai.autolog()
 MODEL = "databricks-claude-sonnet-4-5"
 TOOLS = [...]  # From Step 2
 
+# Resolve and cache the AI Gateway URL once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+_ai_gateway_base_url = str(_client.base_url)
 
-def _get_client() -> DatabricksOpenAI:
-    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
-    wc = WorkspaceClient()
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+def _get_client(obo_token: str | None = None) -> DatabricksOpenAI:
+    """Return a client using the OBO token if provided, else service principal."""
+    if obo_token:
+        obo_wc = WorkspaceClient(
+            config=Config(host=_wc.config.host, token=obo_token)
+        )
+        return DatabricksOpenAI(workspace_client=obo_wc, base_url=_ai_gateway_base_url)
+    return _client
+
+
+def _obo_token(request: ResponsesAgentRequest) -> str | None:
+    return (request.custom_inputs or {}).get("x-forwarded-access-token")
 
 
 @invoke()
@@ -115,7 +124,7 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    response = _get_client().responses.create(
+    response = _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -129,7 +138,7 @@ def stream_handler(request: ResponsesAgentRequest):
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    return _get_client().responses.create(
+    return _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -137,6 +146,8 @@ def stream_handler(request: ResponsesAgentRequest):
     )
 ```
 
+> **OBO note:** The `x-forwarded-access-token` is injected into `custom_inputs` by the app server middleware. No changes are needed to the client — the token arrives automatically when users call your deployed app.
+
 ## Step 4: Grant Permissions in `databricks.yml`
 
 For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
@@ -166,7 +177,7 @@ databricks bundle deploy && databricks bundle run agent_langgraph  # Deploy
 
 ## Troubleshooting
 
-**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
 **"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
 
diff --git a/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md b/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
index 911cd96f..be98c2af 100644
--- a/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
@@ -5,16 +5,13 @@ description: "Replace the client-side agent loop with Databricks Supervisor API
 
 # Use the Databricks Supervisor API
 
-> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
-
-The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
 
-**Limitations (Beta):**
-- Usage tracking is not supported
+**Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
 - Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
 
@@ -26,7 +23,7 @@ Add to `pyproject.toml` if not already present:
 [project]
 dependencies = [
     ...
-    "databricks-openai>=0.9.0",
+    "databricks-openai>=0.14.0",
     "databricks-sdk>=0.55.0",
 ]
 ```
@@ -82,11 +79,14 @@ TOOLS = [
 
 Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
 
-```python
-import re
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
 
+When deployed on Databricks Apps, the platform forwards the authenticated user's token via `x-forwarded-access-token`. Pass this to the Supervisor API so tool calls (e.g., Genie queries) run on behalf of the user rather than the app's service principal.
+
+```python
 import mlflow
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
 from databricks_openai import DatabricksOpenAI
 from mlflow.genai.agent_server import invoke, stream
 from mlflow.types.responses import (
@@ -99,15 +99,24 @@ mlflow.openai.autolog()
 MODEL = "databricks-claude-sonnet-4-5"
 TOOLS = [...]  # From Step 2
 
+# Resolve and cache the AI Gateway URL once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+_ai_gateway_base_url = str(_client.base_url)
 
-def _get_client() -> DatabricksOpenAI:
-    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
-    wc = WorkspaceClient()
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+def _get_client(obo_token: str | None = None) -> DatabricksOpenAI:
+    """Return a client using the OBO token if provided, else service principal."""
+    if obo_token:
+        obo_wc = WorkspaceClient(
+            config=Config(host=_wc.config.host, token=obo_token)
+        )
+        return DatabricksOpenAI(workspace_client=obo_wc, base_url=_ai_gateway_base_url)
+    return _client
+
+
+def _obo_token(request: ResponsesAgentRequest) -> str | None:
+    return (request.custom_inputs or {}).get("x-forwarded-access-token")
 
 
 @invoke()
@@ -115,7 +124,7 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    response = _get_client().responses.create(
+    response = _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -129,7 +138,7 @@ def stream_handler(request: ResponsesAgentRequest):
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    return _get_client().responses.create(
+    return _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -137,6 +146,8 @@ def stream_handler(request: ResponsesAgentRequest):
     )
 ```
 
+> **OBO note:** The `x-forwarded-access-token` is injected into `custom_inputs` by the app server middleware. No changes are needed to the client — the token arrives automatically when users call your deployed app.
+
 ## Step 4: Grant Permissions in `databricks.yml`
 
 For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
@@ -166,7 +177,7 @@ databricks bundle deploy && databricks bundle run agent_migration  # Deploy
 
 ## Troubleshooting
 
-**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
 **"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
 
diff --git a/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md b/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
index 7d5be0ad..23813dfa 100644
--- a/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
@@ -5,16 +5,13 @@ description: "Replace the client-side agent loop with Databricks Supervisor API
 
 # Use the Databricks Supervisor API
 
-> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
-
-The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
 
-**Limitations (Beta):**
-- Usage tracking is not supported
+**Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
 - Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
 
@@ -26,7 +23,7 @@ Add to `pyproject.toml` if not already present:
 [project]
 dependencies = [
     ...
-    "databricks-openai>=0.9.0",
+    "databricks-openai>=0.14.0",
     "databricks-sdk>=0.55.0",
 ]
 ```
@@ -82,11 +79,14 @@ TOOLS = [
 
 Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
 
-```python
-import re
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
 
+When deployed on Databricks Apps, the platform forwards the authenticated user's token via `x-forwarded-access-token`. Pass this to the Supervisor API so tool calls (e.g., Genie queries) run on behalf of the user rather than the app's service principal.
+
+```python
 import mlflow
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
 from databricks_openai import DatabricksOpenAI
 from mlflow.genai.agent_server import invoke, stream
 from mlflow.types.responses import (
@@ -99,15 +99,24 @@ mlflow.openai.autolog()
 MODEL = "databricks-claude-sonnet-4-5"
 TOOLS = [...]  # From Step 2
 
+# Resolve and cache the AI Gateway URL once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+_ai_gateway_base_url = str(_client.base_url)
 
-def _get_client() -> DatabricksOpenAI:
-    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
-    wc = WorkspaceClient()
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+def _get_client(obo_token: str | None = None) -> DatabricksOpenAI:
+    """Return a client using the OBO token if provided, else service principal."""
+    if obo_token:
+        obo_wc = WorkspaceClient(
+            config=Config(host=_wc.config.host, token=obo_token)
+        )
+        return DatabricksOpenAI(workspace_client=obo_wc, base_url=_ai_gateway_base_url)
+    return _client
+
+
+def _obo_token(request: ResponsesAgentRequest) -> str | None:
+    return (request.custom_inputs or {}).get("x-forwarded-access-token")
 
 
 @invoke()
@@ -115,7 +124,7 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    response = _get_client().responses.create(
+    response = _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -129,7 +138,7 @@ def stream_handler(request: ResponsesAgentRequest):
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    return _get_client().responses.create(
+    return _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -137,6 +146,8 @@ def stream_handler(request: ResponsesAgentRequest):
     )
 ```
 
+> **OBO note:** The `x-forwarded-access-token` is injected into `custom_inputs` by the app server middleware. No changes are needed to the client — the token arrives automatically when users call your deployed app.
+
 ## Step 4: Grant Permissions in `databricks.yml`
 
 For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
@@ -166,7 +177,7 @@ databricks bundle deploy && databricks bundle run agent_non_conversational  # De
 
 ## Troubleshooting
 
-**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
 **"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
 
diff --git a/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
index f18ed94f..261a4526 100644
--- a/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
@@ -5,16 +5,13 @@ description: "Replace the client-side agent loop with Databricks Supervisor API
 
 # Use the Databricks Supervisor API
 
-> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
-
-The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
 
-**Limitations (Beta):**
-- Usage tracking is not supported
+**Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
 - Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
 
@@ -26,7 +23,7 @@ Add to `pyproject.toml` if not already present:
 [project]
 dependencies = [
     ...
-    "databricks-openai>=0.9.0",
+    "databricks-openai>=0.14.0",
     "databricks-sdk>=0.55.0",
 ]
 ```
@@ -82,11 +79,14 @@ TOOLS = [
 
 Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
 
-```python
-import re
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
 
+When deployed on Databricks Apps, the platform forwards the authenticated user's token via `x-forwarded-access-token`. Pass this to the Supervisor API so tool calls (e.g., Genie queries) run on behalf of the user rather than the app's service principal.
+
+```python
 import mlflow
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
 from databricks_openai import DatabricksOpenAI
 from mlflow.genai.agent_server import invoke, stream
 from mlflow.types.responses import (
@@ -99,15 +99,24 @@ mlflow.openai.autolog()
 MODEL = "databricks-claude-sonnet-4-5"
 TOOLS = [...]  # From Step 2
 
+# Resolve and cache the AI Gateway URL once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+_ai_gateway_base_url = str(_client.base_url)
 
-def _get_client() -> DatabricksOpenAI:
-    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
-    wc = WorkspaceClient()
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+def _get_client(obo_token: str | None = None) -> DatabricksOpenAI:
+    """Return a client using the OBO token if provided, else service principal."""
+    if obo_token:
+        obo_wc = WorkspaceClient(
+            config=Config(host=_wc.config.host, token=obo_token)
+        )
+        return DatabricksOpenAI(workspace_client=obo_wc, base_url=_ai_gateway_base_url)
+    return _client
+
+
+def _obo_token(request: ResponsesAgentRequest) -> str | None:
+    return (request.custom_inputs or {}).get("x-forwarded-access-token")
 
 
 @invoke()
@@ -115,7 +124,7 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    response = _get_client().responses.create(
+    response = _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -129,7 +138,7 @@ def stream_handler(request: ResponsesAgentRequest):
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    return _get_client().responses.create(
+    return _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -137,6 +146,8 @@ def stream_handler(request: ResponsesAgentRequest):
     )
 ```
 
+> **OBO note:** The `x-forwarded-access-token` is injected into `custom_inputs` by the app server middleware. No changes are needed to the client — the token arrives automatically when users call your deployed app.
+
 ## Step 4: Grant Permissions in `databricks.yml`
 
 For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
@@ -166,7 +177,7 @@ databricks bundle deploy && databricks bundle run agent_openai_agents_sdk_long_r
 
 ## Troubleshooting
 
-**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
 **"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
 
diff --git a/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
index 5ba51cb7..fdf316e0 100644
--- a/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
@@ -5,16 +5,13 @@ description: "Replace the client-side agent loop with Databricks Supervisor API
 
 # Use the Databricks Supervisor API
 
-> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
-
-The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
 
-**Limitations (Beta):**
-- Usage tracking is not supported
+**Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
 - Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
 
@@ -26,7 +23,7 @@ Add to `pyproject.toml` if not already present:
 [project]
 dependencies = [
     ...
-    "databricks-openai>=0.9.0",
+    "databricks-openai>=0.14.0",
     "databricks-sdk>=0.55.0",
 ]
 ```
@@ -82,11 +79,14 @@ TOOLS = [
 
 Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
 
-```python
-import re
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
 
+When deployed on Databricks Apps, the platform forwards the authenticated user's token via `x-forwarded-access-token`. Pass this to the Supervisor API so tool calls (e.g., Genie queries) run on behalf of the user rather than the app's service principal.
+
+```python
 import mlflow
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
 from databricks_openai import DatabricksOpenAI
 from mlflow.genai.agent_server import invoke, stream
 from mlflow.types.responses import (
@@ -99,15 +99,24 @@ mlflow.openai.autolog()
 MODEL = "databricks-claude-sonnet-4-5"
 TOOLS = [...]  # From Step 2
 
+# Resolve and cache the AI Gateway URL once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+_ai_gateway_base_url = str(_client.base_url)
 
-def _get_client() -> DatabricksOpenAI:
-    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
-    wc = WorkspaceClient()
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+def _get_client(obo_token: str | None = None) -> DatabricksOpenAI:
+    """Return a client using the OBO token if provided, else service principal."""
+    if obo_token:
+        obo_wc = WorkspaceClient(
+            config=Config(host=_wc.config.host, token=obo_token)
+        )
+        return DatabricksOpenAI(workspace_client=obo_wc, base_url=_ai_gateway_base_url)
+    return _client
+
+
+def _obo_token(request: ResponsesAgentRequest) -> str | None:
+    return (request.custom_inputs or {}).get("x-forwarded-access-token")
 
 
 @invoke()
@@ -115,7 +124,7 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    response = _get_client().responses.create(
+    response = _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -129,7 +138,7 @@ def stream_handler(request: ResponsesAgentRequest):
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    return _get_client().responses.create(
+    return _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -137,6 +146,8 @@ def stream_handler(request: ResponsesAgentRequest):
     )
 ```
 
+> **OBO note:** The `x-forwarded-access-token` is injected into `custom_inputs` by the app server middleware. No changes are needed to the client — the token arrives automatically when users call your deployed app.
+
 ## Step 4: Grant Permissions in `databricks.yml`
 
 For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
@@ -166,7 +177,7 @@ databricks bundle deploy && databricks bundle run agent_openai_agents_sdk_multia
 
 ## Troubleshooting
 
-**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
 **"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
 
diff --git a/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
index 6927175e..207d3ac3 100644
--- a/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
@@ -5,16 +5,13 @@ description: "Replace the client-side agent loop with Databricks Supervisor API
 
 # Use the Databricks Supervisor API
 
-> **Beta Feature:** The Supervisor API requires **AI Gateway (Beta) preview** to be enabled in your workspace. Contact your Databricks account team if it's not available.
-
-The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent calling tools and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop (tool calls, retries, synthesis) for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
 
-**Limitations (Beta):**
-- Usage tracking is not supported
+**Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
 - Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
 
@@ -26,7 +23,7 @@ Add to `pyproject.toml` if not already present:
 [project]
 dependencies = [
     ...
-    "databricks-openai>=0.9.0",
+    "databricks-openai>=0.14.0",
     "databricks-sdk>=0.55.0",
 ]
 ```
@@ -82,11 +79,14 @@ TOOLS = [
 
 Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
 
-```python
-import re
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
 
+When deployed on Databricks Apps, the platform forwards the authenticated user's token via `x-forwarded-access-token`. Pass this to the Supervisor API so tool calls (e.g., Genie queries) run on behalf of the user rather than the app's service principal.
+
+```python
 import mlflow
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.config import Config
 from databricks_openai import DatabricksOpenAI
 from mlflow.genai.agent_server import invoke, stream
 from mlflow.types.responses import (
@@ -99,15 +99,24 @@ mlflow.openai.autolog()
 MODEL = "databricks-claude-sonnet-4-5"
 TOOLS = [...]  # From Step 2
 
+# Resolve and cache the AI Gateway URL once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+_ai_gateway_base_url = str(_client.base_url)
 
-def _get_client() -> DatabricksOpenAI:
-    """Create a DatabricksOpenAI client pointed at the AI Gateway."""
-    wc = WorkspaceClient()
-    host = wc.config.host  # e.g. https://my-workspace.cloud.databricks.com
-    workspace_id = wc.get_workspace_id()
-    domain = re.match(r"https://[^.]+\.(.+)", host).group(1)
-    base_url = f"https://{workspace_id}.ai-gateway.{domain}/mlflow/v1"
-    return DatabricksOpenAI(workspace_client=wc, base_url=base_url)
+
+def _get_client(obo_token: str | None = None) -> DatabricksOpenAI:
+    """Return a client using the OBO token if provided, else service principal."""
+    if obo_token:
+        obo_wc = WorkspaceClient(
+            config=Config(host=_wc.config.host, token=obo_token)
+        )
+        return DatabricksOpenAI(workspace_client=obo_wc, base_url=_ai_gateway_base_url)
+    return _client
+
+
+def _obo_token(request: ResponsesAgentRequest) -> str | None:
+    return (request.custom_inputs or {}).get("x-forwarded-access-token")
 
 
 @invoke()
@@ -115,7 +124,7 @@ def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    response = _get_client().responses.create(
+    response = _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -129,7 +138,7 @@ def stream_handler(request: ResponsesAgentRequest):
     mlflow.update_current_trace(
         metadata={"mlflow.trace.session": request.context.conversation_id}
     )
-    return _get_client().responses.create(
+    return _get_client(_obo_token(request)).responses.create(
         model=MODEL,
         input=[i.model_dump() for i in request.input],
         tools=TOOLS,
@@ -137,6 +146,8 @@ def stream_handler(request: ResponsesAgentRequest):
     )
 ```
 
+> **OBO note:** The `x-forwarded-access-token` is injected into `custom_inputs` by the app server middleware. No changes are needed to the client — the token arrives automatically when users call your deployed app.
+
 ## Step 4: Grant Permissions in `databricks.yml`
 
 For each hosted tool, grant the corresponding resource access. See the **add-tools** skill for complete YAML examples.
@@ -166,7 +177,7 @@ databricks bundle deploy && databricks bundle run agent_openai_agents_sdk  # Dep
 
 ## Troubleshooting
 
-**"AI Gateway not available"** — Enable AI Gateway (Beta) preview in workspace settings, or contact your Databricks account team.
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
 **"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
 

From c5599e56d28144fc52b7e5cbd1082a05a3d47168 Mon Sep 17 00:00:00 2001
From: Sid Murching <sid.murching@databricks.com>
Date: Fri, 20 Mar 2026 22:59:47 -0700
Subject: [PATCH 5/6] fix: clarify agent_endpoint only supports KA endpoints in
 use-supervisor-api skill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The agent_endpoint tool type routes through KATool which always sends
ka_query as the input parameter — a KA-specific protocol. Regular
LangGraph/OpenAI agents don't implement this protocol and will fail.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .claude/skills/use-supervisor-api/SKILL.md       | 14 ++++++++------
 .../.claude/skills/use-supervisor-api/SKILL.md   | 14 ++++++++------
 .../.claude/skills/use-supervisor-api/SKILL.md   | 14 ++++++++------
 .../.claude/skills/use-supervisor-api/SKILL.md   | 14 ++++++++------
 .../.claude/skills/use-supervisor-api/SKILL.md   | 14 ++++++++------
 .../.claude/skills/use-supervisor-api/SKILL.md   | 14 ++++++++------
 .../.claude/skills/use-supervisor-api/SKILL.md   | 14 ++++++++------
 .../.claude/skills/use-supervisor-api/SKILL.md   | 14 ++++++++------
 .../.claude/skills/use-supervisor-api/SKILL.md   | 14 ++++++++------
 .../e2e-chatbot-app-next/package-lock.json       | 16 ++++++++++++++++
 10 files changed, 88 insertions(+), 54 deletions(-)
 create mode 100644 agent-openai-agents-sdk/e2e-chatbot-app-next/package-lock.json

diff --git a/.claude/skills/use-supervisor-api/SKILL.md b/.claude/skills/use-supervisor-api/SKILL.md
index 5d281014..be02ce10 100644
--- a/.claude/skills/use-supervisor-api/SKILL.md
+++ b/.claude/skills/use-supervisor-api/SKILL.md
@@ -9,7 +9,7 @@ The Supervisor API lets Databricks run the tool-selection and synthesis loop ser
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, KA (Knowledge Assistant) agent endpoints, or MCP servers via UC connections.
 
 **Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
@@ -54,13 +54,15 @@ TOOLS = [
             "description": "Executes a custom UC function",
         },
     },
-    # Agent endpoint — delegates to another agent
+    # KA (Knowledge Assistant) endpoint — delegates to a Knowledge Assistant agent
+    # Note: agent_endpoint only supports KA endpoints, not arbitrary agent serving endpoints.
+    # KA endpoints use a specific ka_query protocol; regular LangGraph/OpenAI agents do not.
     {
         "type": "agent_endpoint",
         "agent_endpoint": {
-            "name": "my-sub-agent",
-            "description": "A specialized sub-agent",
-            "endpoint_name": "<serving-endpoint-name>",
+            "name": "my-ka-agent",
+            "description": "A Knowledge Assistant agent",
+            "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
     # MCP server via UC connection
@@ -156,7 +158,7 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 |-----------|-------------------|
 | `genie` | `genie_space` with `CAN_RUN` |
 | `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
-| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
 | `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
diff --git a/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
index 1a605e73..23154452 100644
--- a/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
@@ -9,7 +9,7 @@ The Supervisor API lets Databricks run the tool-selection and synthesis loop ser
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, KA (Knowledge Assistant) agent endpoints, or MCP servers via UC connections.
 
 **Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
@@ -54,13 +54,15 @@ TOOLS = [
             "description": "Executes a custom UC function",
         },
     },
-    # Agent endpoint — delegates to another agent
+    # KA (Knowledge Assistant) endpoint — delegates to a Knowledge Assistant agent
+    # Note: agent_endpoint only supports KA endpoints, not arbitrary agent serving endpoints.
+    # KA endpoints use a specific ka_query protocol; regular LangGraph/OpenAI agents do not.
     {
         "type": "agent_endpoint",
         "agent_endpoint": {
-            "name": "my-sub-agent",
-            "description": "A specialized sub-agent",
-            "endpoint_name": "<serving-endpoint-name>",
+            "name": "my-ka-agent",
+            "description": "A Knowledge Assistant agent",
+            "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
     # MCP server via UC connection
@@ -156,7 +158,7 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 |-----------|-------------------|
 | `genie` | `genie_space` with `CAN_RUN` |
 | `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
-| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
 | `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
diff --git a/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
index af72c082..af359b76 100644
--- a/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
@@ -9,7 +9,7 @@ The Supervisor API lets Databricks run the tool-selection and synthesis loop ser
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, KA (Knowledge Assistant) agent endpoints, or MCP servers via UC connections.
 
 **Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
@@ -54,13 +54,15 @@ TOOLS = [
             "description": "Executes a custom UC function",
         },
     },
-    # Agent endpoint — delegates to another agent
+    # KA (Knowledge Assistant) endpoint — delegates to a Knowledge Assistant agent
+    # Note: agent_endpoint only supports KA endpoints, not arbitrary agent serving endpoints.
+    # KA endpoints use a specific ka_query protocol; regular LangGraph/OpenAI agents do not.
     {
         "type": "agent_endpoint",
         "agent_endpoint": {
-            "name": "my-sub-agent",
-            "description": "A specialized sub-agent",
-            "endpoint_name": "<serving-endpoint-name>",
+            "name": "my-ka-agent",
+            "description": "A Knowledge Assistant agent",
+            "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
     # MCP server via UC connection
@@ -156,7 +158,7 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 |-----------|-------------------|
 | `genie` | `genie_space` with `CAN_RUN` |
 | `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
-| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
 | `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
diff --git a/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
index 19dac46e..e314a2fd 100644
--- a/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
@@ -9,7 +9,7 @@ The Supervisor API lets Databricks run the tool-selection and synthesis loop ser
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, KA (Knowledge Assistant) agent endpoints, or MCP servers via UC connections.
 
 **Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
@@ -54,13 +54,15 @@ TOOLS = [
             "description": "Executes a custom UC function",
         },
     },
-    # Agent endpoint — delegates to another agent
+    # KA (Knowledge Assistant) endpoint — delegates to a Knowledge Assistant agent
+    # Note: agent_endpoint only supports KA endpoints, not arbitrary agent serving endpoints.
+    # KA endpoints use a specific ka_query protocol; regular LangGraph/OpenAI agents do not.
     {
         "type": "agent_endpoint",
         "agent_endpoint": {
-            "name": "my-sub-agent",
-            "description": "A specialized sub-agent",
-            "endpoint_name": "<serving-endpoint-name>",
+            "name": "my-ka-agent",
+            "description": "A Knowledge Assistant agent",
+            "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
     # MCP server via UC connection
@@ -156,7 +158,7 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 |-----------|-------------------|
 | `genie` | `genie_space` with `CAN_RUN` |
 | `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
-| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
 | `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
diff --git a/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md b/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
index be98c2af..cbfc6d3a 100644
--- a/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
@@ -9,7 +9,7 @@ The Supervisor API lets Databricks run the tool-selection and synthesis loop ser
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, KA (Knowledge Assistant) agent endpoints, or MCP servers via UC connections.
 
 **Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
@@ -54,13 +54,15 @@ TOOLS = [
             "description": "Executes a custom UC function",
         },
     },
-    # Agent endpoint — delegates to another agent
+    # KA (Knowledge Assistant) endpoint — delegates to a Knowledge Assistant agent
+    # Note: agent_endpoint only supports KA endpoints, not arbitrary agent serving endpoints.
+    # KA endpoints use a specific ka_query protocol; regular LangGraph/OpenAI agents do not.
     {
         "type": "agent_endpoint",
         "agent_endpoint": {
-            "name": "my-sub-agent",
-            "description": "A specialized sub-agent",
-            "endpoint_name": "<serving-endpoint-name>",
+            "name": "my-ka-agent",
+            "description": "A Knowledge Assistant agent",
+            "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
     # MCP server via UC connection
@@ -156,7 +158,7 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 |-----------|-------------------|
 | `genie` | `genie_space` with `CAN_RUN` |
 | `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
-| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
 | `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
diff --git a/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md b/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
index 23813dfa..5a63e3da 100644
--- a/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
@@ -9,7 +9,7 @@ The Supervisor API lets Databricks run the tool-selection and synthesis loop ser
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, KA (Knowledge Assistant) agent endpoints, or MCP servers via UC connections.
 
 **Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
@@ -54,13 +54,15 @@ TOOLS = [
             "description": "Executes a custom UC function",
         },
     },
-    # Agent endpoint — delegates to another agent
+    # KA (Knowledge Assistant) endpoint — delegates to a Knowledge Assistant agent
+    # Note: agent_endpoint only supports KA endpoints, not arbitrary agent serving endpoints.
+    # KA endpoints use a specific ka_query protocol; regular LangGraph/OpenAI agents do not.
     {
         "type": "agent_endpoint",
         "agent_endpoint": {
-            "name": "my-sub-agent",
-            "description": "A specialized sub-agent",
-            "endpoint_name": "<serving-endpoint-name>",
+            "name": "my-ka-agent",
+            "description": "A Knowledge Assistant agent",
+            "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
     # MCP server via UC connection
@@ -156,7 +158,7 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 |-----------|-------------------|
 | `genie` | `genie_space` with `CAN_RUN` |
 | `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
-| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
 | `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
diff --git a/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
index 261a4526..895080ad 100644
--- a/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
@@ -9,7 +9,7 @@ The Supervisor API lets Databricks run the tool-selection and synthesis loop ser
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, KA (Knowledge Assistant) agent endpoints, or MCP servers via UC connections.
 
 **Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
@@ -54,13 +54,15 @@ TOOLS = [
             "description": "Executes a custom UC function",
         },
     },
-    # Agent endpoint — delegates to another agent
+    # KA (Knowledge Assistant) endpoint — delegates to a Knowledge Assistant agent
+    # Note: agent_endpoint only supports KA endpoints, not arbitrary agent serving endpoints.
+    # KA endpoints use a specific ka_query protocol; regular LangGraph/OpenAI agents do not.
     {
         "type": "agent_endpoint",
         "agent_endpoint": {
-            "name": "my-sub-agent",
-            "description": "A specialized sub-agent",
-            "endpoint_name": "<serving-endpoint-name>",
+            "name": "my-ka-agent",
+            "description": "A Knowledge Assistant agent",
+            "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
     # MCP server via UC connection
@@ -156,7 +158,7 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 |-----------|-------------------|
 | `genie` | `genie_space` with `CAN_RUN` |
 | `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
-| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
 | `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
diff --git a/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
index fdf316e0..a8ea0fe2 100644
--- a/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
@@ -9,7 +9,7 @@ The Supervisor API lets Databricks run the tool-selection and synthesis loop ser
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, KA (Knowledge Assistant) agent endpoints, or MCP servers via UC connections.
 
 **Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
@@ -54,13 +54,15 @@ TOOLS = [
             "description": "Executes a custom UC function",
         },
     },
-    # Agent endpoint — delegates to another agent
+    # KA (Knowledge Assistant) endpoint — delegates to a Knowledge Assistant agent
+    # Note: agent_endpoint only supports KA endpoints, not arbitrary agent serving endpoints.
+    # KA endpoints use a specific ka_query protocol; regular LangGraph/OpenAI agents do not.
     {
         "type": "agent_endpoint",
         "agent_endpoint": {
-            "name": "my-sub-agent",
-            "description": "A specialized sub-agent",
-            "endpoint_name": "<serving-endpoint-name>",
+            "name": "my-ka-agent",
+            "description": "A Knowledge Assistant agent",
+            "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
     # MCP server via UC connection
@@ -156,7 +158,7 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 |-----------|-------------------|
 | `genie` | `genie_space` with `CAN_RUN` |
 | `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
-| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
 | `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
diff --git a/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
index 207d3ac3..29e92748 100644
--- a/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
@@ -9,7 +9,7 @@ The Supervisor API lets Databricks run the tool-selection and synthesis loop ser
 
 ## When to Use
 
-Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, agent endpoints, or MCP servers via UC connections.
+Use the Supervisor API when you want Databricks to manage the full agent loop for hosted tools: Genie spaces, UC functions, KA (Knowledge Assistant) agent endpoints, or MCP servers via UC connections.
 
 **Limitations:**
 - Cannot mix hosted tools with client-side function tools in the same request
@@ -54,13 +54,15 @@ TOOLS = [
             "description": "Executes a custom UC function",
         },
     },
-    # Agent endpoint — delegates to another agent
+    # KA (Knowledge Assistant) endpoint — delegates to a Knowledge Assistant agent
+    # Note: agent_endpoint only supports KA endpoints, not arbitrary agent serving endpoints.
+    # KA endpoints use a specific ka_query protocol; regular LangGraph/OpenAI agents do not.
     {
         "type": "agent_endpoint",
         "agent_endpoint": {
-            "name": "my-sub-agent",
-            "description": "A specialized sub-agent",
-            "endpoint_name": "<serving-endpoint-name>",
+            "name": "my-ka-agent",
+            "description": "A Knowledge Assistant agent",
+            "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
     # MCP server via UC connection
@@ -156,7 +158,7 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 |-----------|-------------------|
 | `genie` | `genie_space` with `CAN_RUN` |
 | `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
-| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` |
+| `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
 | `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
diff --git a/agent-openai-agents-sdk/e2e-chatbot-app-next/package-lock.json b/agent-openai-agents-sdk/e2e-chatbot-app-next/package-lock.json
new file mode 100644
index 00000000..5c93f53a
--- /dev/null
+++ b/agent-openai-agents-sdk/e2e-chatbot-app-next/package-lock.json
@@ -0,0 +1,16 @@
+{
+  "name": "e2e-chatbot-app-next",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "client": {
+      "extraneous": true
+    },
+    "packages/core": {
+      "extraneous": true
+    },
+    "server": {
+      "extraneous": true
+    }
+  }
+}

From de0de23590e58f3ca193e850aa810d420e569d8a Mon Sep 17 00:00:00 2001
From: Sid Murching <sid.murching@databricks.com>
Date: Mon, 23 Mar 2026 19:08:01 -0700
Subject: [PATCH 6/6] Update Supervisor API skill tool field names

- genie -> genie_space (drop name field)
- uc_function -> unity_catalog_function (drop name_alias)
- mcp -> external_mcp_server (drop name field)
- Update permissions table and troubleshooting section

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .claude/skills/use-supervisor-api/SKILL.md    | 25 ++++++++-----------
 .../skills/use-supervisor-api/SKILL.md        | 25 ++++++++-----------
 .../skills/use-supervisor-api/SKILL.md        | 25 ++++++++-----------
 .../skills/use-supervisor-api/SKILL.md        | 25 ++++++++-----------
 .../skills/use-supervisor-api/SKILL.md        | 25 ++++++++-----------
 .../skills/use-supervisor-api/SKILL.md        | 25 ++++++++-----------
 .../skills/use-supervisor-api/SKILL.md        | 25 ++++++++-----------
 .../skills/use-supervisor-api/SKILL.md        | 25 ++++++++-----------
 .../skills/use-supervisor-api/SKILL.md        | 25 ++++++++-----------
 9 files changed, 99 insertions(+), 126 deletions(-)

diff --git a/.claude/skills/use-supervisor-api/SKILL.md b/.claude/skills/use-supervisor-api/SKILL.md
index be02ce10..e5882021 100644
--- a/.claude/skills/use-supervisor-api/SKILL.md
+++ b/.claude/skills/use-supervisor-api/SKILL.md
@@ -38,19 +38,17 @@ Define your tools as a list of dicts. Run `uv run discover-tools` to find availa
 TOOLS = [
     # Genie space — natural language queries over structured data
     {
-        "type": "genie",
-        "genie": {
-            "name": "my-genie-space",
+        "type": "genie_space",
+        "genie_space": {
             "description": "Query sales data using natural language",
             "space_id": "<genie-space-id>",
         },
     },
     # UC function — SQL or Python UDF
     {
-        "type": "uc_function",
-        "uc_function": {
+        "type": "unity_catalog_function",
+        "unity_catalog_function": {
             "name": "<catalog>.<schema>.<function_name>",
-            "name_alias": "my_function",
             "description": "Executes a custom UC function",
         },
     },
@@ -65,11 +63,10 @@ TOOLS = [
             "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
-    # MCP server via UC connection
+    # External MCP server via UC connection
     {
-        "type": "mcp",
-        "mcp": {
-            "name": "my-mcp-server",
+        "type": "external_mcp_server",
+        "external_mcp_server": {
             "description": "An external MCP server",
             "connection_name": "<uc-connection-name>",
         },
@@ -156,10 +153,10 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 
 | Tool type | Resource to grant |
 |-----------|-------------------|
-| `genie` | `genie_space` with `CAN_RUN` |
-| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `genie_space` | `genie_space` with `CAN_RUN` |
+| `unity_catalog_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
 | `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
-| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+| `external_mcp_server` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
 
@@ -181,6 +178,6 @@ databricks bundle deploy && databricks bundle run {{BUNDLE_NAME}}  # Deploy
 
 **"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
-**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `unity_catalog_function`, `agent_endpoint`, `external_mcp_server`).
 
 **"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
index 23154452..db9d8776 100644
--- a/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-langgraph-long-term-memory/.claude/skills/use-supervisor-api/SKILL.md
@@ -38,19 +38,17 @@ Define your tools as a list of dicts. Run `uv run discover-tools` to find availa
 TOOLS = [
     # Genie space — natural language queries over structured data
     {
-        "type": "genie",
-        "genie": {
-            "name": "my-genie-space",
+        "type": "genie_space",
+        "genie_space": {
             "description": "Query sales data using natural language",
             "space_id": "<genie-space-id>",
         },
     },
     # UC function — SQL or Python UDF
     {
-        "type": "uc_function",
-        "uc_function": {
+        "type": "unity_catalog_function",
+        "unity_catalog_function": {
             "name": "<catalog>.<schema>.<function_name>",
-            "name_alias": "my_function",
             "description": "Executes a custom UC function",
         },
     },
@@ -65,11 +63,10 @@ TOOLS = [
             "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
-    # MCP server via UC connection
+    # External MCP server via UC connection
     {
-        "type": "mcp",
-        "mcp": {
-            "name": "my-mcp-server",
+        "type": "external_mcp_server",
+        "external_mcp_server": {
             "description": "An external MCP server",
             "connection_name": "<uc-connection-name>",
         },
@@ -156,10 +153,10 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 
 | Tool type | Resource to grant |
 |-----------|-------------------|
-| `genie` | `genie_space` with `CAN_RUN` |
-| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `genie_space` | `genie_space` with `CAN_RUN` |
+| `unity_catalog_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
 | `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
-| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+| `external_mcp_server` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
 
@@ -181,6 +178,6 @@ databricks bundle deploy && databricks bundle run agent_langgraph_long_term_memo
 
 **"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
-**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `unity_catalog_function`, `agent_endpoint`, `external_mcp_server`).
 
 **"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
index af359b76..4866fd44 100644
--- a/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-langgraph-short-term-memory/.claude/skills/use-supervisor-api/SKILL.md
@@ -38,19 +38,17 @@ Define your tools as a list of dicts. Run `uv run discover-tools` to find availa
 TOOLS = [
     # Genie space — natural language queries over structured data
     {
-        "type": "genie",
-        "genie": {
-            "name": "my-genie-space",
+        "type": "genie_space",
+        "genie_space": {
             "description": "Query sales data using natural language",
             "space_id": "<genie-space-id>",
         },
     },
     # UC function — SQL or Python UDF
     {
-        "type": "uc_function",
-        "uc_function": {
+        "type": "unity_catalog_function",
+        "unity_catalog_function": {
             "name": "<catalog>.<schema>.<function_name>",
-            "name_alias": "my_function",
             "description": "Executes a custom UC function",
         },
     },
@@ -65,11 +63,10 @@ TOOLS = [
             "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
-    # MCP server via UC connection
+    # External MCP server via UC connection
     {
-        "type": "mcp",
-        "mcp": {
-            "name": "my-mcp-server",
+        "type": "external_mcp_server",
+        "external_mcp_server": {
             "description": "An external MCP server",
             "connection_name": "<uc-connection-name>",
         },
@@ -156,10 +153,10 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 
 | Tool type | Resource to grant |
 |-----------|-------------------|
-| `genie` | `genie_space` with `CAN_RUN` |
-| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `genie_space` | `genie_space` with `CAN_RUN` |
+| `unity_catalog_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
 | `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
-| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+| `external_mcp_server` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
 
@@ -181,6 +178,6 @@ databricks bundle deploy && databricks bundle run agent_langgraph_short_term_mem
 
 **"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
-**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `unity_catalog_function`, `agent_endpoint`, `external_mcp_server`).
 
 **"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md b/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
index e314a2fd..a9ac7d45 100644
--- a/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-langgraph/.claude/skills/use-supervisor-api/SKILL.md
@@ -38,19 +38,17 @@ Define your tools as a list of dicts. Run `uv run discover-tools` to find availa
 TOOLS = [
     # Genie space — natural language queries over structured data
     {
-        "type": "genie",
-        "genie": {
-            "name": "my-genie-space",
+        "type": "genie_space",
+        "genie_space": {
             "description": "Query sales data using natural language",
             "space_id": "<genie-space-id>",
         },
     },
     # UC function — SQL or Python UDF
     {
-        "type": "uc_function",
-        "uc_function": {
+        "type": "unity_catalog_function",
+        "unity_catalog_function": {
             "name": "<catalog>.<schema>.<function_name>",
-            "name_alias": "my_function",
             "description": "Executes a custom UC function",
         },
     },
@@ -65,11 +63,10 @@ TOOLS = [
             "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
-    # MCP server via UC connection
+    # External MCP server via UC connection
     {
-        "type": "mcp",
-        "mcp": {
-            "name": "my-mcp-server",
+        "type": "external_mcp_server",
+        "external_mcp_server": {
             "description": "An external MCP server",
             "connection_name": "<uc-connection-name>",
         },
@@ -156,10 +153,10 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 
 | Tool type | Resource to grant |
 |-----------|-------------------|
-| `genie` | `genie_space` with `CAN_RUN` |
-| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `genie_space` | `genie_space` with `CAN_RUN` |
+| `unity_catalog_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
 | `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
-| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+| `external_mcp_server` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
 
@@ -181,6 +178,6 @@ databricks bundle deploy && databricks bundle run agent_langgraph  # Deploy
 
 **"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
-**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `unity_catalog_function`, `agent_endpoint`, `external_mcp_server`).
 
 **"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md b/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
index cbfc6d3a..adfb9042 100644
--- a/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-migration-from-model-serving/.claude/skills/use-supervisor-api/SKILL.md
@@ -38,19 +38,17 @@ Define your tools as a list of dicts. Run `uv run discover-tools` to find availa
 TOOLS = [
     # Genie space — natural language queries over structured data
     {
-        "type": "genie",
-        "genie": {
-            "name": "my-genie-space",
+        "type": "genie_space",
+        "genie_space": {
             "description": "Query sales data using natural language",
             "space_id": "<genie-space-id>",
         },
     },
     # UC function — SQL or Python UDF
     {
-        "type": "uc_function",
-        "uc_function": {
+        "type": "unity_catalog_function",
+        "unity_catalog_function": {
             "name": "<catalog>.<schema>.<function_name>",
-            "name_alias": "my_function",
             "description": "Executes a custom UC function",
         },
     },
@@ -65,11 +63,10 @@ TOOLS = [
             "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
-    # MCP server via UC connection
+    # External MCP server via UC connection
     {
-        "type": "mcp",
-        "mcp": {
-            "name": "my-mcp-server",
+        "type": "external_mcp_server",
+        "external_mcp_server": {
             "description": "An external MCP server",
             "connection_name": "<uc-connection-name>",
         },
@@ -156,10 +153,10 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 
 | Tool type | Resource to grant |
 |-----------|-------------------|
-| `genie` | `genie_space` with `CAN_RUN` |
-| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `genie_space` | `genie_space` with `CAN_RUN` |
+| `unity_catalog_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
 | `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
-| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+| `external_mcp_server` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
 
@@ -181,6 +178,6 @@ databricks bundle deploy && databricks bundle run agent_migration  # Deploy
 
 **"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
-**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `unity_catalog_function`, `agent_endpoint`, `external_mcp_server`).
 
 **"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md b/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
index 5a63e3da..261b6b9f 100644
--- a/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-non-conversational/.claude/skills/use-supervisor-api/SKILL.md
@@ -38,19 +38,17 @@ Define your tools as a list of dicts. Run `uv run discover-tools` to find availa
 TOOLS = [
     # Genie space — natural language queries over structured data
     {
-        "type": "genie",
-        "genie": {
-            "name": "my-genie-space",
+        "type": "genie_space",
+        "genie_space": {
             "description": "Query sales data using natural language",
             "space_id": "<genie-space-id>",
         },
     },
     # UC function — SQL or Python UDF
     {
-        "type": "uc_function",
-        "uc_function": {
+        "type": "unity_catalog_function",
+        "unity_catalog_function": {
             "name": "<catalog>.<schema>.<function_name>",
-            "name_alias": "my_function",
             "description": "Executes a custom UC function",
         },
     },
@@ -65,11 +63,10 @@ TOOLS = [
             "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
-    # MCP server via UC connection
+    # External MCP server via UC connection
     {
-        "type": "mcp",
-        "mcp": {
-            "name": "my-mcp-server",
+        "type": "external_mcp_server",
+        "external_mcp_server": {
             "description": "An external MCP server",
             "connection_name": "<uc-connection-name>",
         },
@@ -156,10 +153,10 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 
 | Tool type | Resource to grant |
 |-----------|-------------------|
-| `genie` | `genie_space` with `CAN_RUN` |
-| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `genie_space` | `genie_space` with `CAN_RUN` |
+| `unity_catalog_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
 | `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
-| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+| `external_mcp_server` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
 
@@ -181,6 +178,6 @@ databricks bundle deploy && databricks bundle run agent_non_conversational  # De
 
 **"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
-**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `unity_catalog_function`, `agent_endpoint`, `external_mcp_server`).
 
 **"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
index 895080ad..b0a23738 100644
--- a/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-openai-agents-sdk-long-running-agent/.claude/skills/use-supervisor-api/SKILL.md
@@ -38,19 +38,17 @@ Define your tools as a list of dicts. Run `uv run discover-tools` to find availa
 TOOLS = [
     # Genie space — natural language queries over structured data
     {
-        "type": "genie",
-        "genie": {
-            "name": "my-genie-space",
+        "type": "genie_space",
+        "genie_space": {
             "description": "Query sales data using natural language",
             "space_id": "<genie-space-id>",
         },
     },
     # UC function — SQL or Python UDF
     {
-        "type": "uc_function",
-        "uc_function": {
+        "type": "unity_catalog_function",
+        "unity_catalog_function": {
             "name": "<catalog>.<schema>.<function_name>",
-            "name_alias": "my_function",
             "description": "Executes a custom UC function",
         },
     },
@@ -65,11 +63,10 @@ TOOLS = [
             "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
-    # MCP server via UC connection
+    # External MCP server via UC connection
     {
-        "type": "mcp",
-        "mcp": {
-            "name": "my-mcp-server",
+        "type": "external_mcp_server",
+        "external_mcp_server": {
             "description": "An external MCP server",
             "connection_name": "<uc-connection-name>",
         },
@@ -156,10 +153,10 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 
 | Tool type | Resource to grant |
 |-----------|-------------------|
-| `genie` | `genie_space` with `CAN_RUN` |
-| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `genie_space` | `genie_space` with `CAN_RUN` |
+| `unity_catalog_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
 | `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
-| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+| `external_mcp_server` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
 
@@ -181,6 +178,6 @@ databricks bundle deploy && databricks bundle run agent_openai_agents_sdk_long_r
 
 **"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
-**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `unity_catalog_function`, `agent_endpoint`, `external_mcp_server`).
 
 **"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
index a8ea0fe2..96bc94af 100644
--- a/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-openai-agents-sdk-multiagent/.claude/skills/use-supervisor-api/SKILL.md
@@ -38,19 +38,17 @@ Define your tools as a list of dicts. Run `uv run discover-tools` to find availa
 TOOLS = [
     # Genie space — natural language queries over structured data
     {
-        "type": "genie",
-        "genie": {
-            "name": "my-genie-space",
+        "type": "genie_space",
+        "genie_space": {
             "description": "Query sales data using natural language",
             "space_id": "<genie-space-id>",
         },
     },
     # UC function — SQL or Python UDF
     {
-        "type": "uc_function",
-        "uc_function": {
+        "type": "unity_catalog_function",
+        "unity_catalog_function": {
             "name": "<catalog>.<schema>.<function_name>",
-            "name_alias": "my_function",
             "description": "Executes a custom UC function",
         },
     },
@@ -65,11 +63,10 @@ TOOLS = [
             "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
-    # MCP server via UC connection
+    # External MCP server via UC connection
     {
-        "type": "mcp",
-        "mcp": {
-            "name": "my-mcp-server",
+        "type": "external_mcp_server",
+        "external_mcp_server": {
             "description": "An external MCP server",
             "connection_name": "<uc-connection-name>",
         },
@@ -156,10 +153,10 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 
 | Tool type | Resource to grant |
 |-----------|-------------------|
-| `genie` | `genie_space` with `CAN_RUN` |
-| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `genie_space` | `genie_space` with `CAN_RUN` |
+| `unity_catalog_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
 | `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
-| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+| `external_mcp_server` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
 
@@ -181,6 +178,6 @@ databricks bundle deploy && databricks bundle run agent_openai_agents_sdk_multia
 
 **"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
-**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `unity_catalog_function`, `agent_endpoint`, `external_mcp_server`).
 
 **"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
diff --git a/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md b/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
index 29e92748..f36f72ce 100644
--- a/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
+++ b/agent-openai-agents-sdk/.claude/skills/use-supervisor-api/SKILL.md
@@ -38,19 +38,17 @@ Define your tools as a list of dicts. Run `uv run discover-tools` to find availa
 TOOLS = [
     # Genie space — natural language queries over structured data
     {
-        "type": "genie",
-        "genie": {
-            "name": "my-genie-space",
+        "type": "genie_space",
+        "genie_space": {
             "description": "Query sales data using natural language",
             "space_id": "<genie-space-id>",
         },
     },
     # UC function — SQL or Python UDF
     {
-        "type": "uc_function",
-        "uc_function": {
+        "type": "unity_catalog_function",
+        "unity_catalog_function": {
             "name": "<catalog>.<schema>.<function_name>",
-            "name_alias": "my_function",
             "description": "Executes a custom UC function",
         },
     },
@@ -65,11 +63,10 @@ TOOLS = [
             "endpoint_name": "<ka-serving-endpoint-name>",
         },
     },
-    # MCP server via UC connection
+    # External MCP server via UC connection
     {
-        "type": "mcp",
-        "mcp": {
-            "name": "my-mcp-server",
+        "type": "external_mcp_server",
+        "external_mcp_server": {
             "description": "An external MCP server",
             "connection_name": "<uc-connection-name>",
         },
@@ -156,10 +153,10 @@ For each hosted tool, grant the corresponding resource access. See the **add-too
 
 | Tool type | Resource to grant |
 |-----------|-------------------|
-| `genie` | `genie_space` with `CAN_RUN` |
-| `uc_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
+| `genie_space` | `genie_space` with `CAN_RUN` |
+| `unity_catalog_function` | `uc_securable` (FUNCTION) with `EXECUTE` |
 | `agent_endpoint` | `serving_endpoint` with `CAN_QUERY` (KA endpoints only) |
-| `mcp` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
+| `external_mcp_server` | `uc_securable` (CONNECTION) with `USE_CONNECTION` |
 
 Also grant `CAN_QUERY` on the `MODEL` serving endpoint:
 
@@ -181,6 +178,6 @@ databricks bundle deploy && databricks bundle run agent_openai_agents_sdk  # Dep
 
 **"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
 
-**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie`, `uc_function`, `agent_endpoint`, `mcp`).
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `unity_catalog_function`, `agent_endpoint`, `external_mcp_server`).
 
 **"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.