diff --git a/manifest.json b/manifest.json
index 54ec72f..1d6bebb 100644
--- a/manifest.json
+++ b/manifest.json
@@ -1,12 +1,72 @@
 {
   "version": "2",
-  "updated_at": "2026-04-30T11:02:41Z",
+  "updated_at": "2026-05-12T20:26:42Z",
   "skills": {
+    "add-tools-langgraph": {
+      "version": "0.0.1",
+      "description": "Add tools and permissions to a LangGraph agent (MCP, Genie, vector search, UC functions)",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "add-tools-openai": {
+      "version": "0.0.1",
+      "description": "Add tools and permissions to an OpenAI Agents SDK agent (MCP, Genie, vector search, UC functions)",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "agent-langgraph-memory": {
+      "version": "0.0.1",
+      "description": "Add memory capabilities (checkpointing, long-term store) to a LangGraph agent",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "agent-openai-memory": {
+      "version": "0.0.1",
+      "description": "Add memory capabilities (sessions) to an OpenAI Agents SDK agent",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "create-tools": {
+      "version": "0.0.1",
+      "description": "Create Databricks resources (Genie spaces, vector search indexes, UC functions, MCP servers) for use as agent tools",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
     "databricks-apps": {
       "version": "0.1.1",
       "description": "Databricks Apps development and deployment (evaluates analytics vs synced tables data access)",
       "experimental": false,
-      "updated_at": "2026-04-30T11:00:26Z",
+      "updated_at": "2026-05-12T20:25:04Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -33,7 +93,7 @@
       "version": "0.1.0",
       "description": "Core Databricks skill for CLI, auth, and data exploration",
       "experimental": false,
-      "updated_at": "2026-04-23T13:47:44Z",
+      "updated_at": "2026-05-12T20:25:04Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -48,7 +108,7 @@
       "version": "0.0.0",
       "description": "Declarative Automation Bundles (DABs) for deploying and managing Databricks resources",
       "experimental": false,
-      "updated_at": "2026-04-23T13:47:44Z",
+      "updated_at": "2026-05-12T20:25:04Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -66,7 +126,7 @@
       "version": "0.1.0",
       "description": "Databricks Jobs orchestration and scheduling",
       "experimental": false,
-      "updated_at": "2026-04-23T13:47:44Z",
+      "updated_at": "2026-05-12T20:25:04Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -78,7 +138,7 @@
       "version": "0.1.0",
       "description": "Databricks Lakebase Postgres: projects, scaling, connectivity, synced tables, and Data API",
       "experimental": false,
-      "updated_at": "2026-04-30T11:02:37Z",
+      "updated_at": "2026-05-12T20:25:04Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -93,7 +153,7 @@
       "version": "0.1.0",
       "description": "Databricks Model Serving endpoint management",
       "experimental": false,
-      "updated_at": "2026-04-23T13:47:44Z",
+      "updated_at": "2026-05-12T20:25:04Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -105,7 +165,7 @@
       "version": "0.1.0",
       "description": "Databricks Pipelines (DLT) for ETL and streaming",
       "experimental": false,
-      "updated_at": "2026-04-23T13:47:44Z",
+      "updated_at": "2026-05-12T20:25:04Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -152,7 +212,7 @@
       "version": "0.1.0",
       "description": "Migrate Databricks workloads from classic compute to serverless compute, including compatibility checks and concrete fixes",
       "experimental": false,
-      "updated_at": "2026-04-24T15:10:23Z",
+      "updated_at": "2026-05-12T20:25:04Z",
       "files": [
         "SKILL.md",
         "agents/openai.yaml",
@@ -164,6 +224,150 @@
         "references/networking-and-security.md",
         "references/streaming-migration.md"
       ]
+    },
+    "deploy": {
+      "version": "0.0.1",
+      "description": "Deploy an agent to Databricks Apps via Databricks Asset Bundles (DAB)",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "discover-tools": {
+      "version": "0.0.1",
+      "description": "Discover available tools and resources (MCP servers, Genie spaces, UC functions, vector search) in a Databricks workspace",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "lakebase-setup": {
+      "version": "0.0.1",
+      "description": "Configure Lakebase as storage for agent memory (checkpoints, sessions, long-term store)",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "load-testing": {
+      "version": "0.0.1",
+      "description": "Load test a Databricks App to find its maximum QPS",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "long-running-server": {
+      "version": "0.0.1",
+      "description": "Enable long-running background task support in an agent server (LongRunningAgentServer)",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "migrate-from-model-serving": {
+      "version": "0.0.1",
+      "description": "Migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "modify-langgraph-agent": {
+      "version": "0.0.1",
+      "description": "Modify agent code, add tools, or change configuration in a LangGraph template",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "modify-openai-agent": {
+      "version": "0.0.1",
+      "description": "Modify agent code, add tools, or change configuration in an OpenAI Agents SDK template",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "quickstart": {
+      "version": "0.0.1",
+      "description": "Set up a Databricks agent development environment (authentication, .env, MLflow experiment)",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "run-locally": {
+      "version": "0.0.1",
+      "description": "Run and test an agent locally with curl examples and hot-reload",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "supervisor-api": {
+      "version": "0.0.1",
+      "description": "Use the Databricks Supervisor API to run the agent loop server-side with hosted tools",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
+    },
+    "supervisor-api-background-mode": {
+      "version": "0.0.1",
+      "description": "Run long-lived agent tasks via Supervisor API background mode (polling pattern)",
+      "experimental": true,
+      "updated_at": "2026-05-12T20:26:11Z",
+      "files": [
+        "SKILL.md",
+        "agents/openai.yaml",
+        "assets/databricks.png",
+        "assets/databricks.svg"
+      ]
     }
   }
 }
diff --git a/scripts/skills.py b/scripts/skills.py
index cdfdcf7..5bcd00f 100644
--- a/scripts/skills.py
+++ b/scripts/skills.py
@@ -48,6 +48,75 @@
         "description": "Migrate Databricks workloads from classic compute to serverless compute, including compatibility checks and concrete fixes",
         "experimental": False,
     },
+    # Skills imported from databricks/app-templates (.claude/skills/). ML-63273.
+    "add-tools-langgraph": {
+        "description": "Add tools and permissions to a LangGraph agent (MCP, Genie, vector search, UC functions)",
+        "experimental": True,
+    },
+    "add-tools-openai": {
+        "description": "Add tools and permissions to an OpenAI Agents SDK agent (MCP, Genie, vector search, UC functions)",
+        "experimental": True,
+    },
+    "agent-langgraph-memory": {
+        "description": "Add memory capabilities (checkpointing, long-term store) to a LangGraph agent",
+        "experimental": True,
+    },
+    "agent-openai-memory": {
+        "description": "Add memory capabilities (sessions) to an OpenAI Agents SDK agent",
+        "experimental": True,
+    },
+    "create-tools": {
+        "description": "Create Databricks resources (Genie spaces, vector search indexes, UC functions, MCP servers) for use as agent tools",
+        "experimental": True,
+    },
+    "deploy": {
+        "description": "Deploy an agent to Databricks Apps via Databricks Asset Bundles (DAB)",
+        "experimental": True,
+    },
+    "discover-tools": {
+        "description": "Discover available tools and resources (MCP servers, Genie spaces, UC functions, vector search) in a Databricks workspace",
+        "experimental": True,
+    },
+    "lakebase-setup": {
+        "description": "Configure Lakebase as storage for agent memory (checkpoints, sessions, long-term store)",
+        "experimental": True,
+    },
+    "load-testing": {
+        "description": "Load test a Databricks App to find its maximum QPS",
+        "experimental": True,
+    },
+    "long-running-server": {
+        "description": "Enable long-running background task support in an agent server (LongRunningAgentServer)",
+        "experimental": True,
+    },
+    "migrate-from-model-serving": {
+        "description": "Migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps",
+        "experimental": True,
+    },
+    "modify-langgraph-agent": {
+        "description": "Modify agent code, add tools, or change configuration in a LangGraph template",
+        "experimental": True,
+    },
+    "modify-openai-agent": {
+        "description": "Modify agent code, add tools, or change configuration in an OpenAI Agents SDK template",
+        "experimental": True,
+    },
+    "quickstart": {
+        "description": "Set up a Databricks agent development environment (authentication, .env, MLflow experiment)",
+        "experimental": True,
+    },
+    "run-locally": {
+        "description": "Run and test an agent locally with curl examples and hot-reload",
+        "experimental": True,
+    },
+    "supervisor-api": {
+        "description": "Use the Databricks Supervisor API to run the agent loop server-side with hosted tools",
+        "experimental": True,
+    },
+    "supervisor-api-background-mode": {
+        "description": "Run long-lived agent tasks via Supervisor API background mode (polling pattern)",
+        "experimental": True,
+    },
 }
 
 
diff --git a/skills/add-tools-langgraph/SKILL.md b/skills/add-tools-langgraph/SKILL.md
new file mode 100644
index 0000000..f4fcd77
--- /dev/null
+++ b/skills/add-tools-langgraph/SKILL.md
@@ -0,0 +1,126 @@
+---
+name: add-tools-langgraph
+description: "Add tools to your agent and grant required permissions in databricks.yml. Use when: (1) Adding MCP servers, Genie spaces, vector search, or UC functions to agent, (2) Permission errors at runtime, (3) User says 'add tool', 'connect to', 'grant permission', (4) Configuring databricks.yml resources."
+metadata:
+  version: "0.0.1"
+---
+
+# Add Tools & Grant Permissions
+
+> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks <command> --profile <profile>`
+
+> Don't have the resource yet? See **create-tools** skill first.
+
+**After adding any MCP server to your agent, you MUST grant the app access in `databricks.yml`.**
+
+Without this, you'll get permission errors when the agent tries to use the resource.
+
+## Workflow
+
+**Step 1:** Add MCP server in `agent_server/agent.py`:
+```python
+from databricks_langchain import DatabricksMCPServer, DatabricksMultiServerMCPClient
+
+genie_server = DatabricksMCPServer(
+    url=f"{host}/api/2.0/mcp/genie/01234567-89ab-cdef",
+    name="my genie space",
+)
+
+mcp_client = DatabricksMultiServerMCPClient([genie_server])
+tools = await mcp_client.get_tools()
+```
+
+**Step 2:** Grant access in `databricks.yml`:
+```yaml
+resources:
+  apps:
+    agent_langgraph:
+      resources:
+        - name: 'my_genie_space'
+          genie_space:
+            name: 'My Genie Space'
+            space_id: '01234567-89ab-cdef'
+            permission: 'CAN_RUN'
+```
+
+**Step 3:** Deploy and run:
+```bash
+databricks bundle deploy
+databricks bundle run agent_langgraph  # Required to start app with new code!
+```
+
+See **deploy** skill for more details.
+
+## Resource Type Examples
+
+See the `examples/` directory for complete YAML snippets:
+
+| File | Resource Type | When to Use |
+|------|--------------|-------------|
+| `uc-function.yaml` | Unity Catalog function | UC functions via MCP |
+| `uc-connection.yaml` | UC connection | External MCP servers |
+| `vector-search.yaml` | Vector search index | RAG applications |
+| `sql-warehouse.yaml` | SQL warehouse | SQL execution |
+| `serving-endpoint.yaml` | Model serving endpoint | Model inference |
+| `genie-space.yaml` | Genie space | Natural language data |
+| `lakebase.yaml` | Lakebase database | Agent memory storage (provisioned) |
+| `lakebase-autoscaling.yaml` | Lakebase autoscaling postgres | Agent memory storage (autoscaling) |
+| `experiment.yaml` | MLflow experiment | Tracing (already configured) |
+| `app.yaml` | Databricks App (app-to-app) | Custom MCP servers hosted as Apps |
+| `custom-mcp-server.md` | Custom MCP apps | Apps starting with `mcp-*` |
+
+## Custom MCP Servers (Databricks Apps)
+
+Declare the target app as an `app` resource in `databricks.yml` — the bundle grants `CAN_USE` on deploy. Requires Databricks CLI **v0.298.0+**.
+
+```yaml
+resources:
+  apps:
+    agent_langgraph:
+      resources:
+        - name: 'mcp_server'
+          app:
+            name: 'mcp-my-server'
+            permission: CAN_USE
+```
+
+See `examples/custom-mcp-server.md` for the full flow (agent code + YAML + deploy).
+
+## value_from Pattern
+
+**IMPORTANT**: Make sure all `value_from` references in `databricks.yml` `config.env` reference an existing key in the `databricks.yml` `resources` list.
+Some resources need environment variables in your app. Use `value_from` in `databricks.yml` `config.env` to reference resources defined in `databricks.yml`:
+
+```yaml
+# In databricks.yml, under apps.<app>.config.env:
+env:
+  - name: MLFLOW_EXPERIMENT_ID
+    value_from: "experiment"        # References resources.apps.<app>.resources[name='experiment']
+  - name: LAKEBASE_INSTANCE_NAME
+    value_from: "database"   # References resources.apps.<app>.resources[name='database']
+```
+
+**Critical:** Every `value_from` value must match a `name` field in `databricks.yml` resources.
+
+## MCP Error Handling
+
+MCP tool calls can fail (network issues, permission errors, timeouts). Use `handle_tool_error` on MCP servers to catch errors and return them to the LLM instead of crashing the agent:
+
+```python
+DatabricksMCPServer(
+    name="genie",
+    url=f"{host}/api/2.0/mcp/genie/{space_id}",
+    handle_tool_error=True,   # Return error messages to LLM instead of raising
+    timeout=60.0,             # Increase timeout for slow tools like Genie
+)
+```
+
+For local function tools defined with `@tool`, see `create-tools` skill > `examples/local-python-tools.md` for the `ToolException` + `handle_tool_error` pattern.
+
+## Important Notes
+
+- **MLflow experiment**: Already configured in template, no action needed
+- **Multiple resources**: Add multiple entries under `resources:` list
+- **Permission types vary**: Each resource type has specific permission values
+- **Deploy + Run after changes**: Run both `databricks bundle deploy` AND `databricks bundle run {{BUNDLE_NAME}}`
+- **value_from matching**: Ensure `config.env` `value_from` values match `databricks.yml` resource `name` values
diff --git a/skills/add-tools-langgraph/agents/openai.yaml b/skills/add-tools-langgraph/agents/openai.yaml
new file mode 100644
index 0000000..f194c24
--- /dev/null
+++ b/skills/add-tools-langgraph/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Add Tools (LangGraph)"
+  short_description: "Add tools and permissions for LangGraph agents"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $add-tools-langgraph for adding tools and permissions to a LangGraph agent."
diff --git a/skills/add-tools-langgraph/assets/databricks.png b/skills/add-tools-langgraph/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/add-tools-langgraph/assets/databricks.png differ
diff --git a/skills/add-tools-langgraph/assets/databricks.svg b/skills/add-tools-langgraph/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/add-tools-langgraph/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/add-tools-openai/SKILL.md b/skills/add-tools-openai/SKILL.md
new file mode 100644
index 0000000..c43eeb3
--- /dev/null
+++ b/skills/add-tools-openai/SKILL.md
@@ -0,0 +1,104 @@
+---
+name: add-tools-openai
+description: "Add tools to your agent and grant required permissions in databricks.yml. Use when: (1) Adding MCP servers, Genie spaces, vector search, or UC functions to agent, (2) Permission errors at runtime, (3) User says 'add tool', 'connect to', 'grant permission', (4) Configuring databricks.yml resources."
+metadata:
+  version: "0.0.1"
+---
+
+# Add Tools & Grant Permissions
+
+> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks <command> --profile <profile>`
+
+> Don't have the resource yet? See **create-tools** skill first.
+
+**After adding any MCP server to your agent, you MUST grant the app access in `databricks.yml`.**
+
+Without this, you'll get permission errors when the agent tries to use the resource.
+
+## Workflow
+
+**Step 1:** Add MCP server in `agent_server/agent.py`:
+```python
+from databricks_openai.agents import McpServer
+
+genie_server = McpServer(
+    url=f"{host}/api/2.0/mcp/genie/01234567-89ab-cdef",
+    name="my genie space",
+)
+
+agent = Agent(
+    name="my agent",
+    model="databricks-claude-3-7-sonnet",
+    mcp_servers=[genie_server],
+)
+```
+
+**Step 2:** Grant access in `databricks.yml`:
+```yaml
+resources:
+  apps:
+    {{BUNDLE_NAME}}:
+      resources:
+        - name: 'my_genie_space'
+          genie_space:
+            name: 'My Genie Space'
+            space_id: '01234567-89ab-cdef'
+            permission: 'CAN_RUN'
+```
+
+**Step 3:** Deploy with `databricks bundle deploy` (see **deploy** skill)
+
+## Resource Type Examples
+
+See the `examples/` directory for complete YAML snippets:
+
+| File | Resource Type | When to Use |
+|------|--------------|-------------|
+| `uc-function.yaml` | Unity Catalog function | UC functions |
+| `uc-connection.yaml` | UC connection | External MCP servers |
+| `vector-search.yaml` | Vector search index | RAG applications |
+| `sql-warehouse.yaml` | SQL warehouse | SQL execution |
+| `serving-endpoint.yaml` | Model serving endpoint | Model inference |
+| `genie-space.yaml` | Genie space | Natural language data |
+| `lakebase-autoscaling.yaml` | Lakebase autoscaling postgres | Agent memory storage (autoscaling) |
+| `experiment.yaml` | MLflow experiment | Tracing (already configured) |
+| `app.yaml` | Databricks App (app-to-app) | Custom MCP servers hosted as Apps |
+| `custom-mcp-server.md` | Custom MCP apps | Apps starting with `mcp-*` |
+
+## Custom MCP Servers (Databricks Apps)
+
+Declare the target app as an `app` resource in `databricks.yml` — the bundle grants `CAN_USE` on deploy. Requires Databricks CLI **v0.298.0+**.
+
+```yaml
+resources:
+  apps:
+    {{BUNDLE_NAME}}:
+      resources:
+        - name: 'mcp_server'
+          app:
+            name: 'mcp-my-server'
+            permission: CAN_USE
+```
+
+See `examples/custom-mcp-server.md` for the full flow (agent code + YAML + deploy).
+
+## MCP Error Handling
+
+MCP tool calls can fail (network issues, permission errors, timeouts). The OpenAI Agents SDK catches tool errors by default and returns the error message to the LLM. To customize timeout behavior for MCP servers:
+
+```python
+mcp_server = McpServer(
+    url=f"{host}/api/2.0/mcp/genie/{space_id}",
+    name="genie",
+    timeout=60.0,  # Increase timeout for slow tools like Genie (default: 20s)
+)
+```
+
+For local function tools, see `create-tools` skill > `examples/local-python-tools.md` for `failure_error_function` patterns.
+
+## Important Notes
+
+- **MLflow experiment**: Already configured in template, no action needed
+- **Multiple resources**: Add multiple entries under `resources:` list
+- **Permission types vary**: Each resource type has specific permission values
+- **Deploy after changes**: Run `databricks bundle deploy` after modifying `databricks.yml`
diff --git a/skills/add-tools-openai/agents/openai.yaml b/skills/add-tools-openai/agents/openai.yaml
new file mode 100644
index 0000000..6153080
--- /dev/null
+++ b/skills/add-tools-openai/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Add Tools (OpenAI)"
+  short_description: "Add tools and permissions for OpenAI agents"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $add-tools-openai for adding tools and permissions to an OpenAI Agents SDK agent."
diff --git a/skills/add-tools-openai/assets/databricks.png b/skills/add-tools-openai/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/add-tools-openai/assets/databricks.png differ
diff --git a/skills/add-tools-openai/assets/databricks.svg b/skills/add-tools-openai/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/add-tools-openai/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/agent-langgraph-memory/SKILL.md b/skills/agent-langgraph-memory/SKILL.md
new file mode 100644
index 0000000..46d2e58
--- /dev/null
+++ b/skills/agent-langgraph-memory/SKILL.md
@@ -0,0 +1,387 @@
+---
+name: agent-langgraph-memory
+description: "Add memory capabilities to your agent. Use when: (1) User asks about 'memory', 'state', 'remember', 'conversation history', (2) Want to persist conversations or user preferences, (3) Adding checkpointing or long-term storage."
+metadata:
+  version: "0.0.1"
+---
+
+# Adding Memory to Your Agent
+
+> **Note:** This template does not include memory by default. Use this skill to **add memory capabilities**. For a pre-configured memory template, see:
+> - [agent-langgraph-advanced](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced) - Short-term and long-term memory with long-running background tasks
+
+## Memory Types
+
+| Type | Use Case | Storage | Identifier |
+|------|----------|---------|------------|
+| **Short-term** | Conversation history within a session | `AsyncCheckpointSaver` | `thread_id` |
+| **Long-term** | User facts that persist across sessions | `AsyncDatabricksStore` | `user_id` |
+
+## Prerequisites
+
+1. **Add memory dependency** to `pyproject.toml`:
+   ```toml
+   dependencies = [
+       "databricks-langchain[memory]",
+   ]
+   ```
+
+   Then run `uv sync`
+
+2. **Configure Lakebase** - See **lakebase-setup** skill for:
+   - Creating/configuring Lakebase instance
+   - Initializing tables (CRITICAL first-time step)
+
+---
+
+## Quick Setup Summary
+
+Adding memory requires changes to **4 files**:
+
+| File | What to Add |
+|------|-------------|
+| `pyproject.toml` | Memory dependency |
+| `.env` | Lakebase env vars (for local dev) |
+| `databricks.yml` | Lakebase database resource + env vars in config block |
+| `agent_server/agent.py` | Memory tools and AsyncDatabricksStore |
+
+---
+
+## Key Principles
+
+Before implementing memory, understand these patterns from the production implementation.
+
+### 1. Factory Function Pattern
+
+Memory tools should be returned from a factory function, not defined as standalone functions:
+
+```python
+def memory_tools():
+    @tool
+    async def get_user_memory(query: str, config: RunnableConfig) -> str:
+        ...
+    @tool
+    async def save_user_memory(memory_key: str, memory_data_json: str, config: RunnableConfig) -> str:
+        ...
+    @tool
+    async def delete_user_memory(memory_key: str, config: RunnableConfig) -> str:
+        ...
+    return [get_user_memory, save_user_memory, delete_user_memory]
+```
+
+### 2. User ID Extraction
+
+Extract `user_id` from the request, checking `custom_inputs` first. Return `None` (not a default) to let the caller decide:
+
+```python
+def get_user_id(request: ResponsesAgentRequest) -> Optional[str]:
+    custom_inputs = dict(request.custom_inputs or {})
+    if "user_id" in custom_inputs:
+        return custom_inputs["user_id"]
+    if request.context and getattr(request.context, "user_id", None):
+        return request.context.user_id
+    return None
+```
+
+### 3. Separate Error Handling
+
+Check `user_id` and `store` separately with distinct error messages:
+
+```python
+user_id = config.get("configurable", {}).get("user_id")
+if not user_id:
+    return "Memory not available - no user_id provided."
+
+store: Optional[BaseStore] = config.get("configurable", {}).get("store")
+if not store:
+    return "Memory not available - store not configured."
+```
+
+### 4. JSON Validation for Save
+
+Validate JSON input before storing - the LLM may pass invalid JSON:
+
+```python
+try:
+    memory_data = json.loads(memory_data_json)
+    if not isinstance(memory_data, dict):
+        return f"Failed: memory_data must be a JSON object, not {type(memory_data).__name__}"
+    await store.aput(namespace, memory_key, memory_data)
+except json.JSONDecodeError as e:
+    return f"Failed to save memory: Invalid JSON - {e}"
+```
+
+### 5. Pass Store via RunnableConfig
+
+Pass the store through config, not as a function parameter:
+
+```python
+config = {"configurable": {"user_id": user_id, "store": store}}
+# Tools access via: config.get("configurable", {}).get("store")
+```
+
+---
+
+## Complete Example
+
+A full implementation is available in this skill's examples folder:
+
+```bash
+# Copy to your project
+cp .claude/skills/agent-memory/examples/memory_tools.py agent_server/
+```
+
+See `examples/memory_tools.py` for production-ready code including all helper functions.
+
+## Production Reference
+
+For implementations in the pre-built templates:
+
+| File | Description |
+|------|-------------|
+| [`agent-langgraph-advanced/agent_server/utils_memory.py`](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced/agent_server/utils_memory.py) | Memory tools factory, helpers, error handling |
+| [`agent-langgraph-advanced/agent_server/agent.py`](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced/agent_server/agent.py) | Integration with agent, store initialization |
+
+Key functions:
+- `memory_tools()` - Factory returning get/save/delete tools
+- `get_user_id()` - Extract user_id from request
+- `resolve_lakebase_instance_name()` - Handle hostname vs instance name
+- `get_lakebase_access_error_message()` - Helpful error messages
+
+---
+
+## Configuration Files
+
+### Step 1: databricks.yml (Lakebase Resource)
+
+Add the Lakebase database resource to your app:
+
+```yaml
+resources:
+  apps:
+    agent_langgraph:
+      name: "your-app-name"
+      source_code_path: ./
+
+      resources:
+        # ... other resources (experiment, UC functions, etc.) ...
+
+        # Lakebase instance for long-term memory
+        - name: 'database'
+          database:
+            instance_name: '<your-lakebase-instance-name>'
+            database_name: 'databricks_postgres'
+            permission: 'CAN_CONNECT_AND_CREATE'
+```
+
+**Important:** The `name: 'database'` must match the `value_from` reference in the `databricks.yml` `config.env` block.
+
+### Step 2: databricks.yml config block (Environment Variables)
+
+Add the Lakebase environment variables to your app's `config.env` in `databricks.yml`:
+
+```yaml
+      config:
+        command: ["uv", "run", "start-app"]
+        env:
+          # ... other env vars ...
+
+          # Lakebase instance name (resolved from database resource)
+          - name: LAKEBASE_INSTANCE_NAME
+            value_from: "database"
+
+          # Embedding configuration
+          - name: EMBEDDING_ENDPOINT
+            value: "databricks-gte-large-en"
+          - name: EMBEDDING_DIMS
+            value: "1024"
+```
+
+**Important:** `LAKEBASE_INSTANCE_NAME` uses `value_from: "database"` to resolve from the database resource at deploy time.
+
+### Step 3: .env (Local Development)
+
+```bash
+# Lakebase configuration for long-term memory
+LAKEBASE_INSTANCE_NAME=<your-instance-name>
+EMBEDDING_ENDPOINT=databricks-gte-large-en
+EMBEDDING_DIMS=1024
+```
+
+---
+
+## Integration Example
+
+Minimal example showing how to integrate memory into your streaming function:
+
+```python
+from agent_server.utils_memory import memory_tools, get_user_id
+
+@stream()
+async def streaming(request: ResponsesAgentRequest):
+    user_id = get_user_id(request)
+
+    async with AsyncDatabricksStore(
+        instance_name=LAKEBASE_INSTANCE_NAME,
+        embedding_endpoint=EMBEDDING_ENDPOINT,
+        embedding_dims=EMBEDDING_DIMS,
+    ) as store:
+        await store.setup()  # Creates tables if needed
+
+        tools = await mcp_client.get_tools() + memory_tools()
+        config = {"configurable": {"user_id": user_id, "store": store}}
+
+        agent = create_react_agent(model=model, tools=tools)
+        async for event in agent.astream(messages, config):
+            yield event
+```
+
+---
+
+## Initialize Tables and Deploy
+
+### Initialize Lakebase Tables (First Time Only)
+
+Before deploying, initialize the tables locally:
+
+```bash
+uv run python -c "$(cat <<'EOF'
+import asyncio
+from databricks_langchain import AsyncDatabricksStore
+
+async def setup():
+    async with AsyncDatabricksStore(
+        instance_name="<your-instance-name>",
+        embedding_endpoint="databricks-gte-large-en",
+        embedding_dims=1024,
+    ) as store:
+        await store.setup()
+        print("Tables created!")
+
+asyncio.run(setup())
+EOF
+)"
+```
+
+### Deploy
+
+After initializing tables, deploy your agent. See **deploy** skill for full instructions.
+
+---
+
+## Short-Term Memory
+
+For conversation history within a session, use `AsyncCheckpointSaver`:
+
+```python
+from databricks_langchain import AsyncCheckpointSaver
+
+async with AsyncCheckpointSaver(instance_name=LAKEBASE_INSTANCE_NAME) as checkpointer:
+    agent = create_react_agent(
+        model=model,
+        tools=tools,
+        checkpointer=checkpointer,
+    )
+
+    config = {"configurable": {"thread_id": thread_id}}
+    async for event in agent.astream(messages, config):
+        yield event
+```
+
+See the [agent-langgraph-advanced](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced) template for a complete implementation.
+
+---
+
+## Testing Memory
+
+### Test Locally
+
+```bash
+# Start the server
+uv run start-app
+
+# Save a memory
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "Remember that I am on the shipping team"}],
+      "custom_inputs": {"user_id": "alice@example.com"}
+  }'
+
+# Recall the memory
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "What team am I on?"}],
+      "custom_inputs": {"user_id": "alice@example.com"}
+  }'
+
+# Delete a memory
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "Forget what team I am on"}],
+      "custom_inputs": {"user_id": "alice@example.com"}
+  }'
+```
+
+### Test Deployed App
+
+```bash
+# Get OAuth token (PATs don't work for apps)
+TOKEN=$(databricks auth token --host <workspace-url> | jq -r '.access_token')
+
+# Test memory save
+curl -X POST https://<app-url>/invocations \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "Remember I prefer detailed explanations"}],
+      "custom_inputs": {"user_id": "alice@example.com"}
+  }'
+```
+
+---
+
+## First-Time Setup Checklist
+
+- [ ] Added `databricks-langchain[memory]` to `pyproject.toml`
+- [ ] Run `uv sync` to install dependencies
+- [ ] Created or identified Lakebase instance
+- [ ] Added Lakebase env vars to `.env` (for local dev)
+- [ ] Added `database` resource to `databricks.yml`
+- [ ] Added `LAKEBASE_INSTANCE_NAME` to `databricks.yml` `config.env`
+- [ ] **Initialized tables locally** by running `await store.setup()`
+- [ ] Deployed with `databricks bundle deploy && databricks bundle run`
+
+---
+
+## Troubleshooting
+
+| Issue | Cause | Solution |
+|-------|-------|----------|
+| **"embedding_dims is required"** | Missing parameter | Add `embedding_dims=1024` to AsyncDatabricksStore |
+| **"relation 'store' does not exist"** | Tables not created | Run `await store.setup()` locally first |
+| **"Unable to resolve Lakebase instance 'None'"** | Missing env var | Check `LAKEBASE_INSTANCE_NAME` in databricks.yml `config.env` |
+| **"permission denied for table store"** | Missing grants | Add `database` resource to databricks.yml |
+| **"Memory not available - no user_id"** | Missing user_id | Pass `custom_inputs.user_id` in request |
+| **Memory not persisting** | Different user_ids | Use consistent user_id across requests |
+| **App not updated after deploy** | Forgot to run bundle | Run `databricks bundle run agent_langgraph` after deploy |
+
+---
+
+## Pre-Built Memory Templates
+
+For fully configured implementations without manual setup:
+
+| Template | Memory Type | Key Features |
+|----------|-------------|--------------|
+| [agent-langgraph-advanced](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced) | Short-term + Long-term | AsyncCheckpointSaver, AsyncDatabricksStore, memory tools |
+
+---
+
+## Next Steps
+
+- Configure Lakebase: see **lakebase-setup** skill
+- Test locally: see **run-locally** skill
+- Deploy: see **deploy** skill
diff --git a/skills/agent-langgraph-memory/agents/openai.yaml b/skills/agent-langgraph-memory/agents/openai.yaml
new file mode 100644
index 0000000..36e08a8
--- /dev/null
+++ b/skills/agent-langgraph-memory/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Agent Memory (LangGraph)"
+  short_description: "Add memory to a LangGraph agent"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $agent-langgraph-memory for adding memory and conversation persistence to a LangGraph agent."
diff --git a/skills/agent-langgraph-memory/assets/databricks.png b/skills/agent-langgraph-memory/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/agent-langgraph-memory/assets/databricks.png differ
diff --git a/skills/agent-langgraph-memory/assets/databricks.svg b/skills/agent-langgraph-memory/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/agent-langgraph-memory/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/agent-openai-memory/SKILL.md b/skills/agent-openai-memory/SKILL.md
new file mode 100644
index 0000000..8f6f7b4
--- /dev/null
+++ b/skills/agent-openai-memory/SKILL.md
@@ -0,0 +1,178 @@
+---
+name: agent-openai-memory
+description: "Add memory capabilities to your agent. Use when: (1) User asks about 'memory', 'state', 'remember', 'conversation history', (2) Want to persist conversations or user preferences, (3) Adding checkpointing or long-term storage."
+metadata:
+  version: "0.0.1"
+---
+
+# Stateful Memory with OpenAI Agents SDK Sessions
+
+This template uses OpenAI Agents SDK [Sessions](https://openai.github.io/openai-agents-python/sessions/) with `AsyncDatabricksSession` to persist conversation history to a Databricks Lakebase instance.
+
+## How Sessions Work
+
+Sessions automatically manage conversation history for multi-turn interactions:
+
+1. **Before each run**: The session retrieves prior conversation history and prepends it to input
+2. **During the run**: New items (user messages, responses, tool calls) are generated
+3. **After each run**: All new items are automatically stored in the session
+
+This eliminates the need to manually manage conversation state between runs.
+
+## Key Concepts
+
+| Concept | Description |
+|---------|-------------|
+| **Session** | Stores conversation history for a specific `session_id` |
+| **`session_id`** | Unique identifier linking requests to the same conversation |
+| **`AsyncDatabricksSession`** | Session implementation backed by Databricks Lakebase |
+| **`LAKEBASE_INSTANCE_NAME`** | Environment variable specifying the Lakebase instance |
+
+## How This Template Uses Sessions
+
+### Session Creation (`agent_server/agent.py`)
+
+```python
+from databricks_openai.agents import AsyncDatabricksSession
+
+session = AsyncDatabricksSession(
+    session_id=get_session_id(request),
+    instance_name=LAKEBASE_INSTANCE_NAME,
+)
+
+result = await Runner.run(agent, messages, session=session)
+```
+
+### Session ID Extraction (`agent_server/agent.py`)
+
+The `session_id` is extracted from `custom_inputs` or auto-generated:
+
+```python
+def get_session_id(request: ResponsesAgentRequest) -> str:
+    if hasattr(request, "custom_inputs") and request.custom_inputs:
+        if "session_id" in request.custom_inputs:
+            return request.custom_inputs["session_id"]
+    return str(uuid7())
+```
+
+### Lakebase Instance Resolution (`agent_server/utils.py`)
+
+The `LAKEBASE_INSTANCE_NAME` env var can be either an instance name or a hostname. The `resolve_lakebase_instance_name()` function handles both cases:
+
+```python
+_LAKEBASE_INSTANCE_NAME_RAW = os.environ.get("LAKEBASE_INSTANCE_NAME")
+LAKEBASE_INSTANCE_NAME = resolve_lakebase_instance_name(_LAKEBASE_INSTANCE_NAME_RAW)
+```
+
+---
+
+## Prerequisites
+
+1. **Dependency**: `databricks-openai[memory]` must be in `pyproject.toml` (already included)
+
+2. **Lakebase instance**: You need a Databricks Lakebase instance. See the **lakebase-setup** skill for creating and configuring one.
+
+3. **Environment variable**: Set `LAKEBASE_INSTANCE_NAME` in your `.env` file:
+   ```bash
+   LAKEBASE_INSTANCE_NAME=<your-lakebase-instance-name>
+   ```
+
+---
+
+## Configuration Files
+
+### databricks.yml (Lakebase Resource)
+
+Add the Lakebase database resource to your app:
+
+```yaml
+resources:
+  apps:
+    agent_openai_advanced:
+      name: "your-app-name"
+      source_code_path: ./
+
+      resources:
+        # ... other resources (experiment, etc.) ...
+
+        # Lakebase instance for session storage
+        - name: 'database'
+          database:
+            instance_name: '<your-lakebase-instance-name>'
+            database_name: 'databricks_postgres'
+            permission: 'CAN_CONNECT_AND_CREATE'
+```
+
+### databricks.yml config block (Environment Variables)
+
+The `LAKEBASE_INSTANCE_NAME` env var is resolved from the database resource at deploy time. Add to your app's `config.env` in `databricks.yml`:
+
+```yaml
+      config:
+        env:
+          - name: LAKEBASE_INSTANCE_NAME
+            value_from: "database"
+```
+
+### .env (Local Development)
+
+```bash
+LAKEBASE_INSTANCE_NAME=<your-lakebase-instance-name>
+```
+
+---
+
+## Testing Sessions
+
+### Test Multi-Turn Conversation Locally
+
+```bash
+# Start the server
+uv run start-app
+
+# First message - starts a new session
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hello, I live in SF!"}]}'
+
+# Note the session_id from custom_outputs in the response
+
+# Second message - continues the same session
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "What city did I say I live in?"}],
+      "custom_inputs": {"session_id": "<session_id from previous response>"}
+  }'
+```
+
+### Test Streaming
+
+```bash
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "Hello!"}],
+      "stream": true
+  }'
+```
+
+---
+
+## Troubleshooting
+
+| Issue | Cause | Solution |
+|-------|-------|----------|
+| **"LAKEBASE_INSTANCE_NAME environment variable is required"** | Missing env var | Set `LAKEBASE_INSTANCE_NAME` in `.env` |
+| **SSL connection closed unexpectedly** | Network/instance issue | Verify Lakebase instance is running: `databricks lakebase instances get <name>` |
+| **Agent doesn't remember previous messages** | Different session_id | Pass the same `session_id` via `custom_inputs` across requests |
+| **"Unable to resolve hostname"** | Hostname doesn't match any instance | Verify the hostname or use the instance name directly |
+| **Permission denied** | Missing Lakebase access | Add `database` resource to `databricks.yml` with `CAN_CONNECT_AND_CREATE` |
+
+---
+
+## Next Steps
+
+- Configure Lakebase: see **lakebase-setup** skill
+- Test locally: see **run-locally** skill
+- Deploy: see **deploy** skill
diff --git a/skills/agent-openai-memory/agents/openai.yaml b/skills/agent-openai-memory/agents/openai.yaml
new file mode 100644
index 0000000..7706284
--- /dev/null
+++ b/skills/agent-openai-memory/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Agent Memory (OpenAI)"
+  short_description: "Add memory to an OpenAI agent"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $agent-openai-memory for adding memory and conversation persistence to an OpenAI Agents SDK agent."
diff --git a/skills/agent-openai-memory/assets/databricks.png b/skills/agent-openai-memory/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/agent-openai-memory/assets/databricks.png differ
diff --git a/skills/agent-openai-memory/assets/databricks.svg b/skills/agent-openai-memory/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/agent-openai-memory/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/create-tools/SKILL.md b/skills/create-tools/SKILL.md
new file mode 100644
index 0000000..e3cc076
--- /dev/null
+++ b/skills/create-tools/SKILL.md
@@ -0,0 +1,28 @@
+---
+name: create-tools
+description: "Create Databricks resources that agents connect to as tools. Use when: (1) User needs to create a Genie space, vector search index, UC function, or UC connection, (2) User says 'create tool', 'set up genie', 'create vector search', 'register MCP server', (3) Before add-tools when the resource doesn't exist yet, (4) User asks 'what do I need to create before adding this tool'."
+metadata:
+  version: "0.0.1"
+---
+
+# Create Tool Resources
+
+> This skill covers creating the Databricks resources your agent connects to.
+> After creating a resource, use the **add-tools** skill to wire it into your agent and grant permissions.
+
+## Which resource do you need?
+
+| I want my agent to... | Resource to create | Guide |
+|---|---|---|
+| Answer questions about structured data | Genie space | `examples/genie-space.md` |
+| Search documents / RAG | Vector Search index | `examples/vector-search-index.md` |
+| Call custom SQL/Python logic | UC function | `examples/uc-function.md` |
+| Connect to an external MCP server | UC connection | `examples/uc-connection.md` |
+| Add inline Python tools | Local function tools | `examples/local-python-tools.md` |
+
+## Workflow
+
+1. **Discover** existing resources: `uv run discover-tools` (see **discover-tools** skill)
+2. **Create** the resource if it doesn't exist (this skill)
+3. **Add** the MCP server to your agent code + grant permissions (see **add-tools** skill)
+4. **Deploy** (see **deploy** skill)
diff --git a/skills/create-tools/agents/openai.yaml b/skills/create-tools/agents/openai.yaml
new file mode 100644
index 0000000..a798bd3
--- /dev/null
+++ b/skills/create-tools/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Create Tools"
+  short_description: "Create Databricks resources to use as tools"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $create-tools for creating Databricks resources (Genie spaces, vector search, UC functions) for use as agent tools."
diff --git a/skills/create-tools/assets/databricks.png b/skills/create-tools/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/create-tools/assets/databricks.png differ
diff --git a/skills/create-tools/assets/databricks.svg b/skills/create-tools/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/create-tools/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/deploy/SKILL.md b/skills/deploy/SKILL.md
new file mode 100644
index 0000000..4c31ae0
--- /dev/null
+++ b/skills/deploy/SKILL.md
@@ -0,0 +1,250 @@
+---
+name: deploy
+description: "Deploy agent to Databricks Apps using DAB (Databricks Asset Bundles). Use when: (1) User says 'deploy', 'push to databricks', or 'bundle deploy', (2) 'App already exists' error occurs, (3) Need to bind/unbind existing apps, (4) Debugging deployed apps, (5) Querying deployed app endpoints."
+metadata:
+  version: "0.0.1"
+---
+
+# Deploy to Databricks Apps
+
+## Profile Configuration
+
+**IMPORTANT:** Before running any `databricks` CLI command, read the `.env` file to get the `DATABRICKS_CONFIG_PROFILE` value. All commands must include the profile:
+
+```bash
+databricks <command> --profile <profile>
+```
+
+For example, if `.env` has `DATABRICKS_CONFIG_PROFILE=dev`, run `databricks bundle deploy --profile dev`. Without this, the CLI may target the wrong workspace.
+
+## App Naming Convention
+
+Unless the user specifies a different name, apps should use the prefix `agent-*`:
+- `agent-data-analyst`
+- `agent-customer-support`
+- `agent-code-helper`
+
+Update the app name in `databricks.yml`:
+```yaml
+resources:
+  apps:
+    {{BUNDLE_NAME}}:
+      name: "agent-your-app-name"  # Use agent-* prefix
+```
+
+## Deploy Commands
+
+**IMPORTANT:** Run the pre-flight check before deploying to catch errors early, then run commands to deploy and start your app:
+
+```bash
+# 1. Pre-flight check (starts server locally, sends test request, verifies response)
+uv run preflight
+
+# 2. Validate bundle configuration (catches config errors before deploy)
+databricks bundle validate
+
+# 3. Deploy the bundle (creates/updates resources, uploads files)
+databricks bundle deploy
+
+# 4. Run the app (starts/restarts with uploaded source code) - REQUIRED!
+databricks bundle run {{BUNDLE_NAME}}
+```
+
+> **Note:** `bundle deploy` only uploads files and configures resources. `bundle run` is **required** to actually start/restart the app with the new code. If you only run `deploy`, the app will continue running old code!
+
+The resource key `{{BUNDLE_NAME}}` matches the app name in `databricks.yml` under `resources.apps`.
+
+## Handling "App Already Exists" Error
+
+If `databricks bundle deploy` fails with:
+```
+Error: failed to create app
+Failed to create app <app-name>. An app with the same name already exists.
+```
+
+**Ask the user:** "Would you like to bind the existing app to this bundle, or delete it and create a new one?"
+
+### Option 1: Bind Existing App (Recommended)
+
+**Step 1:** Get the existing app's full configuration:
+```bash
+# Get app config including budget_policy_id and other server-side settings
+databricks apps get <existing-app-name> --output json | jq '{name, budget_policy_id, description}'
+```
+
+**Step 2:** Update `databricks.yml` to match the existing app's configuration exactly:
+```yaml
+resources:
+  apps:
+    {{BUNDLE_NAME}}:
+      name: "existing-app-name"  # Must match exactly
+      budget_policy_id: "xxx-xxx-xxx"  # Copy from step 1 if present
+```
+
+> **Why this matters:** Existing apps may have server-side configuration (like `budget_policy_id`) that isn't in your bundle. If these don't match, Terraform will fail with "Provider produced inconsistent result after apply". Always sync the app's current config to `databricks.yml` before binding.
+
+**Step 3:** If deploying to a `mode: production` target, set `workspace.root_path`:
+```yaml
+targets:
+  prod:
+    mode: production
+    workspace:
+      root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target}
+```
+
+> **Why this matters:** Production mode requires an explicit root path to ensure only one copy of the bundle is deployed. Without this, the deploy will fail with a recommendation to set `workspace.root_path`.
+
+**Step 4:** Check if already bound, then bind if needed:
+```bash
+# Check if resource is already managed by this bundle
+databricks bundle summary --output json | jq '.resources.apps'
+
+# If the app appears in the summary, skip binding and go to Step 5
+# If NOT in summary, bind the resource:
+databricks bundle deployment bind {{BUNDLE_NAME}} <existing-app-name> --auto-approve
+```
+
+> **Note:** If bind fails with "Resource already managed by Terraform", the app is already bound to this bundle. Skip to Step 5 and deploy directly.
+
+**Step 5:** Deploy:
+```bash
+databricks bundle deploy
+databricks bundle run {{BUNDLE_NAME}}
+```
+
+### Option 2: Delete and Recreate
+
+```bash
+databricks apps delete <app-name>
+databricks bundle deploy
+```
+
+**Warning:** This permanently deletes the app's URL, OAuth credentials, and service principal.
+
+## Unbinding an App
+
+To remove the link between bundle and deployed app:
+
+```bash
+databricks bundle deployment unbind {{BUNDLE_NAME}}
+```
+
+Use when:
+- Switching to a different app
+- Letting bundle create a new app
+- Switching between deployed instances
+
+Note: Unbinding doesn't delete the deployed app.
+
+## Query Deployed App
+
+> **IMPORTANT:** Databricks Apps are **only** queryable via OAuth token. You **cannot** use a Personal Access Token (PAT) to query your agent. Attempting to use a PAT will result in a 302 redirect error.
+
+**Get OAuth token:**
+```bash
+databricks auth token | jq -r '.access_token'
+```
+
+**Send request:**
+```bash
+curl -X POST <app-url>/invocations \
+  -H "Authorization: Bearer <oauth-token>" \
+  -H "Content-Type: application/json" \
+  -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }'
+```
+
+**If using memory** - include `user_id` to scope memories per user:
+```bash
+curl -X POST <app-url>/invocations \
+  -H "Authorization: Bearer <oauth-token>" \
+  -H "Content-Type: application/json" \
+  -d '{
+      "input": [{"role": "user", "content": "What do you remember about me?"}],
+      "custom_inputs": {"user_id": "user@example.com"}
+  }'
+```
+
+## On-Behalf-Of (OBO) User Authentication
+
+To authenticate as the requesting user instead of the app service principal:
+
+```python
+from agent_server.utils import get_user_workspace_client
+
+# In your agent code
+user_client = get_user_workspace_client()
+# Use user_client for operations that should run as the user
+```
+
+This is useful when you want the agent to access resources with the user's permissions rather than the app's service principal permissions.
+
+See: [OBO authentication documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/auth#retrieve-user-authorization-credentials)
+
+## Debug Deployed Apps
+
+```bash
+# View logs (follow mode)
+databricks apps logs <app-name> --follow
+
+# Check app status
+databricks apps get <app-name> --output json | jq '{app_status, compute_status}'
+
+# Get app URL
+databricks apps get <app-name> --output json | jq -r '.url'
+```
+
+## Post-Deploy: Autoscaling Lakebase Resources
+
+If the agent uses **autoscaling Lakebase** (user mentions "autoscaling", "project", or "branch" in the context of Lakebase), the postgres resource is declared natively in `databricks.yml` — `databricks bundle deploy` creates the app with it. You only need to grant table permissions to the app's service principal after deploy:
+
+```bash
+# Find the SP client ID
+databricks apps get <name> --output json | jq -r '.service_principal_client_id'
+
+# Grant table permissions (see scripts/grant_lakebase_permissions.py)
+```
+
+**See `.claude/skills/add-tools/examples/lakebase-autoscaling.yaml` for the full resource snippet.** Requires CLI v0.295.0+ for native `postgres` resource support.
+
+## Important Notes
+
+- **App naming convention**: App names must be prefixed with `agent-` (e.g., `agent-my-assistant`, `agent-data-analyst`)
+- **Name is immutable**: Changing the `name` field in `databricks.yml` forces app replacement (destroy + create)
+- **Remote Terraform state**: Databricks stores state remotely; same app detected across directories
+- **Review the plan**: Look for `# forces replacement` in Terraform output before confirming
+
+## FAQ
+
+**Q: I see a 200 OK in the logs, but get an error in the actual stream. What's going on?**
+
+This is expected behavior. The initial 200 OK confirms stream setup was successful. Errors that occur during streaming don't affect the initial HTTP status code. Check the stream content for the actual error message.
+
+**Q: When querying my agent, I get a 302 redirect error. What's wrong?**
+
+You're likely using a Personal Access Token (PAT). Databricks Apps only support OAuth tokens. Generate one with:
+```bash
+databricks auth token
+```
+
+**Q: How do I add dependencies to my agent?**
+
+Use `uv add`:
+```bash
+uv add <package_name>
+# Example: uv add "mlflow-skinny[databricks]"
+```
+
+## Troubleshooting
+
+| Issue | Solution |
+|-------|----------|
+| Validation errors | Run `databricks bundle validate` to see detailed errors before deploying |
+| Permission errors at runtime | Grant resources in `databricks.yml` (see **add-tools** skill) |
+| Lakebase access errors | See **lakebase-setup** skill for permissions (if using memory) |
+| App not starting | Check `databricks apps logs <app-name>` |
+| Auth token expired | Run `databricks auth token` again |
+| 302 redirect error | Use OAuth token, not PAT |
+| "Provider produced inconsistent result" | Sync app config to `databricks.yml` |
+| "should set workspace.root_path" | Add `root_path` to production target |
+| App running old code after deploy | Run `databricks bundle run {{BUNDLE_NAME}}` after deploy |
+| Env var is None in deployed app | Check `value_from` in databricks.yml `config.env` matches resource `name` |
diff --git a/skills/deploy/agents/openai.yaml b/skills/deploy/agents/openai.yaml
new file mode 100644
index 0000000..63f3c2f
--- /dev/null
+++ b/skills/deploy/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Deploy Agent App"
+  short_description: "Deploy agent app via DAB"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $deploy for deploying an agent to Databricks Apps via Databricks Asset Bundles."
diff --git a/skills/deploy/assets/databricks.png b/skills/deploy/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/deploy/assets/databricks.png differ
diff --git a/skills/deploy/assets/databricks.svg b/skills/deploy/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/deploy/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/discover-tools/SKILL.md b/skills/discover-tools/SKILL.md
new file mode 100644
index 0000000..d7f7d82
--- /dev/null
+++ b/skills/discover-tools/SKILL.md
@@ -0,0 +1,51 @@
+---
+name: discover-tools
+description: "Discover available tools and resources in Databricks workspace. Use when: (1) User asks 'what tools are available', (2) Before writing agent code, (3) Looking for MCP servers, Genie spaces, UC functions, or vector search indexes, (4) User says 'discover', 'find resources', or 'what can I connect to'."
+metadata:
+  version: "0.0.1"
+---
+
+# Discover Available Tools
+
+**Run tool discovery BEFORE writing agent code** to understand what resources are available in the workspace.
+
+## Run Discovery
+
+```bash
+uv run discover-tools
+```
+
+**Options:**
+```bash
+# Limit to specific catalog/schema
+uv run discover-tools --catalog my_catalog --schema my_schema
+
+# Output as JSON
+uv run discover-tools --format json --output tools.json
+
+# Save markdown report
+uv run discover-tools --output tools.md
+
+# Use specific Databricks profile
+uv run discover-tools --profile DEFAULT
+```
+
+## What Gets Discovered
+
+| Resource Type | Description | MCP URL Pattern |
+|--------------|-------------|-----------------|
+| **UC Functions** | SQL UDFs as agent tools | `{host}/api/2.0/mcp/functions/{catalog}/{schema}` |
+| **UC Tables** | Structured data for querying | (via UC functions) |
+| **Vector Search Indexes** | RAG applications | `{host}/api/2.0/mcp/vector-search/{catalog}/{schema}` |
+| **Genie Spaces** | Natural language data interface | `{host}/api/2.0/mcp/genie/{space_id}` |
+| **Custom MCP Servers** | Apps starting with `mcp-*` | `{app_url}/mcp` |
+| **External MCP Servers** | Via UC connections | `{host}/api/2.0/mcp/external/{connection_name}` |
+
+## Next Steps
+
+After discovering tools:
+1. **Add MCP servers to your agent** - See **modify-agent** skill for SDK-specific code examples
+2. **Grant permissions** in `databricks.yml` - See **add-tools** skill for YAML snippets
+3. **Test locally** with `uv run start-app` - See **run-locally** skill
+
+Need a resource that doesn't exist yet? See the **create-tools** skill.
diff --git a/skills/discover-tools/agents/openai.yaml b/skills/discover-tools/agents/openai.yaml
new file mode 100644
index 0000000..7450d8e
--- /dev/null
+++ b/skills/discover-tools/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Discover Tools"
+  short_description: "Discover available workspace tools"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $discover-tools for discovering MCP servers, Genie spaces, UC functions, and vector search indexes in a Databricks workspace."
diff --git a/skills/discover-tools/assets/databricks.png b/skills/discover-tools/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/discover-tools/assets/databricks.png differ
diff --git a/skills/discover-tools/assets/databricks.svg b/skills/discover-tools/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/discover-tools/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/lakebase-setup/SKILL.md b/skills/lakebase-setup/SKILL.md
new file mode 100644
index 0000000..83fa097
--- /dev/null
+++ b/skills/lakebase-setup/SKILL.md
@@ -0,0 +1,469 @@
+---
+name: lakebase-setup
+description: "Configure Lakebase for agent memory storage. Use when: (1) Adding memory capabilities to the agent, (2) 'Failed to connect to Lakebase' errors, (3) Permission errors on checkpoint/store tables, (4) User says 'lakebase', 'memory setup', or 'add memory'."
+metadata:
+  version: "0.0.1"
+---
+
+# Lakebase Setup for Agent Persistence
+
+> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks <command> --profile <profile>` or `DATABRICKS_CONFIG_PROFILE=<profile> databricks <command>`
+
+> **Two types of Lakebase:** Databricks supports **provisioned** instances (with instance name) and **autoscaling** instances (project/branch model). This skill covers both. Make sure you know which Lakebase instance the user is using, ask the user which type they are using if unclear.
+
+## Use Cases
+
+Lakebase is used for three distinct purposes across the agent templates:
+
+| Use case | Templates | Description |
+|----------|-----------|-------------|
+| **Chat UI conversation history** | All templates | The built-in chat UI (`e2e-chatbot-app-next`) can persist conversations across page refreshes and browser sessions. This is purely UI-side persistence — the agent itself is stateless. |
+| **Agent short-term memory** | `agent-langgraph-advanced`, `agent-openai-advanced` | Conversation threads within a session via `AsyncCheckpointSaver` (LangGraph) or `AsyncDatabricksSession` (OpenAI SDK). The agent remembers what was said earlier in the same conversation. |
+| **Agent long-term memory** | `agent-langgraph-advanced` | User facts across sessions via `AsyncDatabricksStore`. The agent remembers things about a user from previous conversations. |
+
+> **Note:** When the quickstart prompts for Lakebase on a non-memory template, it's for **chat UI history** only — not for the agent. Memory templates always require Lakebase.
+
+## Overview
+
+Lakebase provides persistent PostgreSQL storage for agents:
+- **Short-term memory** (LangGraph): Conversation history within a thread (`AsyncCheckpointSaver`)
+- **Long-term memory** (LangGraph): User facts across sessions (`AsyncDatabricksStore`)
+- **Short-term memory** (OpenAI SDK): Conversation history via `AsyncDatabricksSession`
+- **Long-running agent persistence** (OpenAI SDK): Background task state via custom SQLAlchemy tables (`agent_server` schema)
+
+> **Note:** For pre-configured memory templates, see:
+> - `agent-langgraph-advanced` - Short-term memory, long-term memory, and long-running background tasks (LangGraph)
+> - `agent-openai-advanced` - Short-term memory and long-running background tasks (OpenAI SDK)
+
+## Complete Setup Workflow
+
+```
+┌───────────────────────────────────────────────────────────────────────────┐
+│  1. Add dependency  →  2. Get instance  →  3. Configure DAB              │
+│  4. Configure .env  →  5. Deploy  →  6. Grant SP permissions  →  7. Run  │
+└───────────────────────────────────────────────────────────────────────────┘
+```
+
+> **Shortcut:** If using a pre-configured memory template, `uv run quickstart` with Lakebase flags handles steps 2-4 automatically. You still need to do steps 5-7 manually.
+
+---
+
+## Step 1: Add Memory Dependency
+
+Add the memory extra to your `pyproject.toml`:
+
+```toml
+dependencies = [
+    "databricks-langchain[memory]",
+    # ... other dependencies
+]
+```
+
+Then sync dependencies:
+```bash
+uv sync
+```
+
+---
+
+## Step 2: Create or Get Lakebase Instance
+
+### Option A: Provisioned Instance
+
+1. Go to your Databricks workspace
+2. Navigate to **Compute** → **Lakebase**
+3. Click **Create Instance** (or use an existing one)
+4. Note the **instance name**
+
+### Option B: Autoscaling Instance
+
+Autoscaling uses a **project/branch** model. You need three values:
+- **Project name** (e.g., `my-project`)
+- **Branch name** (e.g., `my-branch`)
+- **Database ID** (e.g., `db-xxxx-xxxxxxxxxx`)
+
+Find these via the postgres API:
+
+```bash
+# List projects
+databricks api get /api/2.0/postgres/projects --profile <profile>
+
+# List branches for a project
+databricks api get /api/2.0/postgres/projects/<project-name>/branches --profile <profile>
+
+# List databases for a branch
+databricks api get /api/2.0/postgres/projects/<project-name>/branches/<branch-name>/databases --profile <profile>
+```
+
+**Important:** The database ID is the internal ID (e.g., `db-xxxx-xxxxxxxxxx`), NOT `databricks_postgres`.
+
+---
+
+## Step 3: Configure databricks.yml (Lakebase Resource)
+
+> **Note:** If you ran `uv run quickstart` with Lakebase flags (`--lakebase-provisioned-name` or `--lakebase-autoscaling-project`/`--lakebase-autoscaling-branch`), the quickstart already configured `databricks.yml` for you — including fetching the database ID for autoscaling. Manual configuration is only needed if you didn't use quickstart or need to change values.
+
+### Option A: Provisioned
+
+Add the `database` resource to your app in `databricks.yml`:
+
+```yaml
+resources:
+  apps:
+    your_app:
+      name: "your-app-name"
+      source_code_path: ./
+      resources:
+        # ... other resources (experiment, UC functions, etc.) ...
+
+        # Lakebase instance for long-term memory
+        - name: 'database'
+          database:
+            instance_name: '<your-lakebase-instance-name>'
+            database_name: 'databricks_postgres'
+            permission: 'CAN_CONNECT_AND_CREATE'
+```
+
+**Important:**
+- The `instance_name: '<your-lakebase-instance-name>'` must match the actual Lakebase instance name
+- Using the `database` resource type automatically grants the app's service principal access to Lakebase
+See `.claude/skills/add-tools/examples/lakebase.yaml` for the YAML snippet.
+
+### Option B: Autoscaling
+
+Add the `postgres` resource to your app in `databricks.yml`:
+
+```yaml
+resources:
+  apps:
+    your_app:
+      name: "your-app-name"
+      source_code_path: ./
+      resources:
+        # ... other resources (experiment, UC functions, etc.) ...
+
+        # Autoscaling Lakebase instance for long-term memory
+        - name: 'postgres'
+          postgres:
+            branch: "projects/<project-name>/branches/<branch-name>"
+            database: "projects/<project-name>/branches/<branch-name>/databases/<database-id>"
+            permission: 'CAN_CONNECT_AND_CREATE'
+```
+
+**Important:** The `branch` and `database` fields use full resource path format.
+
+See `.claude/skills/add-tools/examples/lakebase-autoscaling.yaml` for the YAML snippet.
+
+### Add Environment Variables to databricks.yml config block
+
+**Provisioned:**
+```yaml
+      config:
+        env:
+          # Lakebase instance name - resolved from database resource at deploy time
+          - name: LAKEBASE_INSTANCE_NAME
+            value_from: "database"
+          # Static values for embedding configuration
+          - name: EMBEDDING_ENDPOINT
+            value: "databricks-gte-large-en"
+          - name: EMBEDDING_DIMS
+            value: "1024"
+```
+
+**Autoscaling:**
+```yaml
+      config:
+        env:
+          # Autoscaling Lakebase config
+          - name: LAKEBASE_AUTOSCALING_PROJECT
+            value: "<your-project-name>"
+          - name: LAKEBASE_AUTOSCALING_BRANCH
+            value: "<your-branch-name>"
+          # Static values for embedding configuration
+          - name: EMBEDDING_ENDPOINT
+            value: "databricks-gte-large-en"
+          - name: EMBEDDING_DIMS
+            value: "1024"
+```
+
+---
+
+## Step 4: Configure .env (Local Development)
+
+For local development, add to `.env`:
+
+**Provisioned:**
+```bash
+LAKEBASE_INSTANCE_NAME=<your-instance-name>
+EMBEDDING_ENDPOINT=databricks-gte-large-en
+EMBEDDING_DIMS=1024
+```
+
+**Autoscaling:**
+```bash
+LAKEBASE_AUTOSCALING_PROJECT=<your-project-name>
+LAKEBASE_AUTOSCALING_BRANCH=<your-branch-name>
+EMBEDDING_ENDPOINT=databricks-gte-large-en
+EMBEDDING_DIMS=1024
+```
+
+**Important:** `embedding_dims` must match the embedding endpoint:
+
+| Endpoint | Dimensions |
+|----------|------------|
+| `databricks-gte-large-en` | 1024 |
+| `databricks-bge-large-en` | 1024 |
+
+> **Note:** `.env` is only for local development. When deployed, the app gets values from `databricks.yml` config env.
+
+---
+
+## Step 5: Initialize Tables
+## Step 5: Deploy
+
+Deploy the app so the service principal and resources are created:
+
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> databricks bundle deploy
+```
+
+---
+
+## Step 6: Grant SP Permissions (CRITICAL)
+
+> **WARNING:** You MUST complete this step before running the app. Without it, the app will fail with database migration errors like `CREATE TABLE IF NOT EXISTS "drizzle"."__drizzle_migrations"` — permission denied.
+
+After deploying, the app's service principal needs Postgres roles to access Lakebase tables. The DAB resource grants basic connectivity, but you must also grant Postgres-level schema and table permissions.
+
+**Step 1:** Get the app's service principal client ID:
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> databricks apps get <app-name> --output json | jq -r '.service_principal_client_id'
+```
+
+**Step 2:** Grant permissions using the grant script:
+
+```bash
+# Provisioned:
+DATABRICKS_CONFIG_PROFILE=<profile> uv run python scripts/grant_lakebase_permissions.py <sp-client-id> \
+  --memory-type <type> --instance-name <name>
+
+# Autoscaling (endpoint — reads LAKEBASE_AUTOSCALING_ENDPOINT from .env by default):
+DATABRICKS_CONFIG_PROFILE=<profile> uv run python scripts/grant_lakebase_permissions.py <sp-client-id> \
+  --memory-type <type> --autoscaling-endpoint <endpoint>
+
+# Autoscaling (project + branch):
+DATABRICKS_CONFIG_PROFILE=<profile> uv run python scripts/grant_lakebase_permissions.py <sp-client-id> \
+  --memory-type <type> --project <project> --branch <branch>
+```
+
+**Memory type by template:**
+
+| Template | `--memory-type` value |
+|----------|-----------------------|
+| `agent-langgraph-advanced` | `langgraph` |
+| `agent-openai-advanced` | `openai` |
+
+The script handles fresh branches gracefully (warns but doesn't fail if tables don't exist yet — they'll be created on first app startup).
+
+---
+
+## Step 7: Run Your App
+
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> databricks bundle run {{BUNDLE_NAME}}
+```
+
+> **Note:** `bundle deploy` only uploads files and configures resources. `bundle run` is required to actually start the app with the new code.
+
+---
+
+## Complete Examples: databricks.yml with Lakebase
+
+### Provisioned Lakebase
+
+```yaml
+bundle:
+  name: agent_langgraph
+
+resources:
+  apps:
+    agent_langgraph:
+      name: "my-agent-app"
+      description: "Agent with long-term memory"
+      source_code_path: ./
+      config:
+        command: ["uv", "run", "start-app"]
+        env:
+          - name: MLFLOW_TRACKING_URI
+            value: "databricks"
+          - name: MLFLOW_REGISTRY_URI
+            value: "databricks-uc"
+          - name: API_PROXY
+            value: "http://localhost:8000/invocations"
+          - name: CHAT_APP_PORT
+            value: "3000"
+          - name: CHAT_PROXY_TIMEOUT_SECONDS
+            value: "300"
+          - name: MLFLOW_EXPERIMENT_ID
+            value_from: "experiment"
+          # Lakebase instance name (resolved from database resource)
+          - name: LAKEBASE_INSTANCE_NAME
+            value_from: "database"
+          # Static values for embedding configuration
+          - name: EMBEDDING_ENDPOINT
+            value: "databricks-gte-large-en"
+          - name: EMBEDDING_DIMS
+            value: "1024"
+
+      resources:
+        - name: 'experiment'
+          experiment:
+            experiment_id: ""
+            permission: 'CAN_MANAGE'
+        - name: 'database'
+          database:
+            instance_name: '<your-lakebase-instance-name>'
+            database_name: 'databricks_postgres'
+            permission: 'CAN_CONNECT_AND_CREATE'
+
+targets:
+  dev:
+    mode: development
+    default: true
+```
+
+### Autoscaling Lakebase
+
+```yaml
+bundle:
+  name: agent_langgraph
+
+resources:
+  apps:
+    agent_langgraph:
+      name: "my-agent-app"
+      description: "Agent with long-term memory"
+      source_code_path: ./
+      config:
+        command: ["uv", "run", "start-app"]
+        env:
+          - name: MLFLOW_TRACKING_URI
+            value: "databricks"
+          - name: MLFLOW_REGISTRY_URI
+            value: "databricks-uc"
+          - name: API_PROXY
+            value: "http://localhost:8000/invocations"
+          - name: CHAT_APP_PORT
+            value: "3000"
+          - name: CHAT_PROXY_TIMEOUT_SECONDS
+            value: "300"
+          - name: MLFLOW_EXPERIMENT_ID
+            value_from: "experiment"
+          # Autoscaling Lakebase config
+          - name: LAKEBASE_AUTOSCALING_PROJECT
+            value: "<your-project-name>"
+          - name: LAKEBASE_AUTOSCALING_BRANCH
+            value: "<your-branch-name>"
+          # Static values for embedding configuration
+          - name: EMBEDDING_ENDPOINT
+            value: "databricks-gte-large-en"
+          - name: EMBEDDING_DIMS
+            value: "1024"
+
+      resources:
+        - name: 'experiment'
+          experiment:
+            experiment_id: ""
+            permission: 'CAN_MANAGE'
+        - name: 'postgres'
+          postgres:
+            branch: "projects/<your-project-name>/branches/<your-branch-name>"
+            database: "projects/<your-project-name>/branches/<your-branch-name>/databases/<your-database-id>"
+            permission: 'CAN_CONNECT_AND_CREATE'
+
+targets:
+  dev:
+    mode: development
+    default: true
+```
+
+---
+
+## Troubleshooting
+
+| Issue | Cause | Solution |
+|-------|-------|----------|
+| **"embedding_dims is required when embedding_endpoint is specified"** | Missing `embedding_dims` parameter | Add `embedding_dims=1024` to AsyncDatabricksStore |
+| **"relation 'store' does not exist"** | Tables not initialized | The app creates tables on first use; ensure SP has CREATE permission |
+| **"Unable to resolve Lakebase instance 'None'"** | Missing env var in deployed app | Add `LAKEBASE_INSTANCE_NAME` to databricks.yml `config.env` |
+| **"permission denied for table store"** | Missing grants | Run `uv run python scripts/grant_lakebase_permissions.py <sp-client-id>` to grant permissions |
+| **"Failed to connect to Lakebase"** | Wrong instance name or project/branch | Verify values in databricks.yml and .env |
+| **Connection pool errors on exit** | Python cleanup race | Ignore `PythonFinalizationError` - it's harmless |
+| **App not updated after deploy** | Forgot to run bundle | Run `databricks bundle run <app>` after deploy |
+| **value_from not resolving** | Resource name mismatch | Ensure `value_from` value matches `name` in databricks.yml resources |
+| **"Invalid postgres resource parameters"** | Missing `database` field in postgres resource | Add full `database` path: `projects/<project>/branches/<branch>/databases/<db-id>` |
+| **`CREATE TABLE IF NOT EXISTS "drizzle"."__drizzle_migrations"` fails** | Grant step was skipped — SP lacks Postgres permissions | Run `grant_lakebase_permissions.py` with `--memory-type`, then restart the app |
+
+---
+
+## LakebaseClient API (for reference)
+
+```python
+from databricks_ai_bridge.lakebase import LakebaseClient, SchemaPrivilege, TablePrivilege
+
+# Provisioned:
+client = LakebaseClient(instance_name="...")
+# Autoscaling:
+client = LakebaseClient(project="...", branch="...")
+
+# Create role (must do first)
+client.create_role(identity_name, "SERVICE_PRINCIPAL")
+
+# Grant schema (note: schemas is a list, grantee not role)
+client.grant_schema(
+    grantee="...",
+    schemas=["public"],
+    privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE],
+)
+
+# Grant tables (note: tables includes schema prefix)
+client.grant_table(
+    grantee="...",
+    tables=["public.store"],
+    privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, ...],
+)
+
+# Execute raw SQL
+client.execute("SELECT * FROM pg_tables WHERE schemaname = 'public'")
+```
+
+### Service Principal Identifiers
+
+When granting permissions manually, note that Databricks apps have multiple identifiers:
+
+| Field | Format | Example |
+|-------|--------|---------|
+| `service_principal_id` | Numeric ID | `1234567890123456` |
+| `service_principal_client_id` | UUID | `a1b2c3d4-e5f6-7890-abcd-ef1234567890` |
+| `service_principal_name` | String name | `my-app-service-principal` |
+
+**Get all identifiers:**
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> databricks apps get <app-name> --output json | jq '{
+  id: .service_principal_id,
+  client_id: .service_principal_client_id,
+  name: .service_principal_name
+}'
+```
+
+**Which to use:**
+- `LakebaseClient.create_role()` - Use `service_principal_client_id` (UUID) or `service_principal_name`
+- Raw SQL grants - Use `service_principal_client_id` (UUID)
+
+---
+
+## Next Steps
+
+- Add memory to agent code: see **agent-memory** skill
+- Test locally: see **run-locally** skill
+- Deploy: see **deploy** skill
diff --git a/skills/lakebase-setup/agents/openai.yaml b/skills/lakebase-setup/agents/openai.yaml
new file mode 100644
index 0000000..e38c752
--- /dev/null
+++ b/skills/lakebase-setup/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Lakebase Setup (Agent Memory)"
+  short_description: "Configure Lakebase for agent memory"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $lakebase-setup for configuring Lakebase as storage for agent memory."
diff --git a/skills/lakebase-setup/assets/databricks.png b/skills/lakebase-setup/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/lakebase-setup/assets/databricks.png differ
diff --git a/skills/lakebase-setup/assets/databricks.svg b/skills/lakebase-setup/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/lakebase-setup/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/load-testing/SKILL.md b/skills/load-testing/SKILL.md
new file mode 100644
index 0000000..f25d7de
--- /dev/null
+++ b/skills/load-testing/SKILL.md
@@ -0,0 +1,321 @@
+---
+name: load-testing
+description: "Load test a Databricks App to find its maximum QPS. Use when: (1) User says 'load test', 'benchmark', 'QPS', 'throughput', or 'performance test', (2) User wants to find how many queries per second their app can handle, (3) User wants to set up load testing scripts for their agent, (4) User wants to view load test results/dashboard."
+metadata:
+  version: "0.0.1"
+---
+
+# Load Testing Your Databricks App
+
+**Goal:** Find the maximum QPS (queries per second) your Databricks App can support.
+
+## Before You Start — Gather Parameters
+
+Before beginning, use the `AskUserQuestion` tool to collect the following from the user:
+
+1. **Do they already have deployed apps to test, or do they need to set up new apps?**
+2. **Do they want to mock LLM calls?** Mocking isolates infrastructure throughput from LLM latency — useful for capacity planning. Testing without mocks measures end-to-end performance.
+3. **What compute sizes do they want to test?** (Medium, Large, or both)
+4. **How many worker configurations do they want to test?** (e.g., 2, 4, 6, 8 workers)
+5. **Do they have M2M OAuth credentials (service principal client_id/client_secret)?** — Recommended for tests longer than ~30 minutes. If not, guide them to create one.
+6. **What is their `DATABRICKS_HOST`?** (workspace URL)
+
+---
+
+## Step 1: Set Up Load Testing Scripts
+
+Create a `load-test-scripts/` directory in the project with the following files. These scripts are framework-agnostic and work with any Databricks App.
+
+### Directory Structure
+
+```
+<project-root>/
+  agent_server/                # Existing agent code
+  load-test-scripts/           # Load testing scripts (create this)
+    run_load_test.py           #   Main CLI — orchestrates Locust tests
+    locustfile.py              #   Locust test definition (SSE streaming, TTFT tracking)
+    dashboard_template.py      #   Generates interactive HTML dashboard from results
+    .env.example               #   Template for env vars
+  load-test-runs/              # Test results (auto-created per run)
+    <run-name>/
+      dashboard.html           #   Interactive dashboard
+      test_config.json         #   Test parameters
+      <label>/                 #   Per-config Locust CSV results
+```
+
+### Required Files
+
+**`locustfile.py`** — Locust load test that:
+- Sends `POST /invocations` with `{"input": [...], "stream": true}` to the app
+- Parses SSE stream (`data: {json}` lines) and counts chunks until `data: [DONE]`
+- Tracks **TTFT** (time to first `data:` line) as a custom Locust metric
+- Uses M2M OAuth token exchange (`client_credentials` grant to `{host}/oidc/v1/token`) with auto-refresh
+- Implements `StepRampShape` — ramps users from `step_size` to `max_users`, holding each level for `step_duration` seconds
+
+**`run_load_test.py`** — CLI orchestrator that:
+- Accepts `--app-url` (repeatable), `--client-id`, `--client-secret`, `--max-users`, `--step-size`, `--step-duration`, `--run-name`, `--dashboard`, `--compute-size`, `--label` flags
+- Tests each app URL sequentially (isolated metrics per config)
+- Refreshes OAuth token before each app
+- Runs healthcheck + warmup before each test
+- Saves results to `load-test-runs/<run-name>/<label>/`
+- Generates dashboard at the end if `--dashboard` is passed
+
+**`dashboard_template.py`** — Generates a self-contained HTML dashboard with Chart.js:
+- KPI cards (best config, peak QPS, lowest latency, total requests)
+- Bar charts: QPS by config (median + peak), latency (p50 + p95), TTFT, total requests
+- QPS Ramp Progression: line charts with QPS/Latency/Failures tabs and a max-users slider
+- Grouped by compute size (medium/large side-by-side)
+- Full results table with peak QPS, users at peak, latency percentiles, failure rate
+- Can be run standalone: `uv run dashboard_template.py ../load-test-runs/<run-name>/`
+
+### Install Dependencies
+
+The load testing scripts use their own `pyproject.toml` inside `load-test-scripts/` to avoid polluting the agent's production dependencies.
+
+```toml
+# load-test-scripts/pyproject.toml
+[project]
+name = "load-test-scripts"
+version = "0.1.0"
+requires-python = ">=3.10"
+dependencies = [
+    "locust>=2.32,<2.40",
+    "urllib3<2.3",
+    "requests",
+]
+```
+
+Then install from within the `load-test-scripts/` directory:
+```bash
+cd load-test-scripts/
+uv sync
+```
+
+> **Note:** `locust>=2.43` has a known `RecursionError`. Pin to `<2.40` to avoid it.
+
+---
+
+## Step 2 (Optional): Mock Your Agent for Load Testing
+
+Mocking is **optional** — you can skip this step to test your real agent end-to-end (including LLM latency). However, mocking is useful for:
+
+- **Capacity planning** — isolating Apps infrastructure throughput from LLM latency (which adds 1-30s per request)
+- **Cost savings** — avoiding FMAPI/token usage during load tests
+- **Reproducibility** — getting consistent measurements independent of LLM response variability
+
+### How to Mock
+
+The mock timing is controlled by two environment variables (set in `app.yaml` or `databricks.yml`):
+- `MOCK_CHUNK_DELAY_MS` — delay between text chunks in milliseconds (default: `10`)
+- `MOCK_CHUNK_COUNT` — number of text chunks per response (default: `80`)
+
+**For OpenAI Agents SDK templates:** Create a `MockAsyncOpenAI` client that replaces `AsyncDatabricksOpenAI`. It simulates tool call streaming (instant) and text response streaming (delayed chunks). A reference implementation is available at [`examples/mock_openai_client.py`](examples/mock_openai_client.py):
+```python
+from agent_server.mock_openai_client import MockAsyncOpenAI
+set_default_openai_client(MockAsyncOpenAI())
+set_default_openai_api("chat_completions")
+```
+
+**For LangGraph templates:** Replace the `ChatDatabricks` model with a mock that returns pre-built `AIMessage` objects with tool calls and text content using configurable delays.
+
+**For custom agents:** Wrap whatever external API calls you make (LLM, vector search, etc.) with mock implementations that return realistic response shapes.
+
+---
+
+## Step 3: Deploy Load Testing Apps
+
+Deploy multiple Databricks Apps with varying compute sizes and worker counts.
+
+### Recommended Test Matrix
+
+| Compute Size | Workers | App Name |
+|-------------|---------|----------|
+| Medium | 2 | `<your-app>-medium-w2` |
+| Medium | 4 | `<your-app>-medium-w4` |
+| Medium | 6 | `<your-app>-medium-w6` |
+| Medium | 8 | `<your-app>-medium-w8` |
+| Large | 6 | `<your-app>-large-w6` |
+| Large | 8 | `<your-app>-large-w8` |
+| Large | 10 | `<your-app>-large-w10` |
+| Large | 12 | `<your-app>-large-w12` |
+
+### Configuring Compute Size
+
+**Databricks CLI:**
+```bash
+databricks apps create <app-name> --compute-size MEDIUM
+databricks apps update <app-name> --compute-size LARGE
+```
+
+**Databricks UI:** Go to **Compute** > **Apps** > your app > **Edit** > **Configure** > **Compute**.
+
+### Configuring Worker Count
+
+`start-server` (via `AgentServer.run()`) accepts a `--workers` flag directly. Pass the worker count in the `command` array using a DAB variable — no wrapper script needed:
+
+```yaml
+variables:
+  app_name:
+    default: "my-agent-medium-w2"
+  workers:
+    default: "2"
+
+resources:
+  apps:
+    load_test_app:
+      name: ${var.app_name}
+      source_code_path: .
+      config:
+        command: ["uv", "run", "start-server", "--workers", "${var.workers}"]
+        env:
+          - name: MOCK_CHUNK_DELAY_MS
+            value: "10"
+          - name: MOCK_CHUNK_COUNT
+            value: "80"
+
+targets:
+  medium-w2:
+    default: true
+    variables:
+      app_name: "my-agent-medium-w2"
+      workers: "2"
+  large-w8:
+    variables:
+      app_name: "my-agent-large-w8"
+      workers: "8"
+```
+
+### Deploying
+
+```bash
+databricks bundle deploy --target medium-w2
+databricks bundle run load_test_app --target medium-w2
+```
+
+Verify apps are ACTIVE before proceeding:
+```bash
+databricks apps get <app-name> --output json | jq '{app_status, compute_status, url}'
+```
+
+---
+
+## Step 4: Run Load Tests
+
+### Authentication — M2M OAuth (Required for Long Tests)
+
+Load tests can run for hours. **U2M OAuth tokens expire** and break your test mid-run. Use M2M (machine-to-machine) OAuth with a service principal instead.
+
+```bash
+export DATABRICKS_HOST=https://your-workspace.cloud.databricks.com
+export DATABRICKS_CLIENT_ID=<your-client-id>
+export DATABRICKS_CLIENT_SECRET=<your-client-secret>
+```
+
+### Parameters Reference
+
+| Parameter | Required | Default | Description |
+|-----------|----------|---------|-------------|
+| `--app-url` | Yes | — | App URL(s) to test (repeatable) |
+| `--client-id` | Recommended | `DATABRICKS_CLIENT_ID` env | Service principal client ID |
+| `--client-secret` | Recommended | `DATABRICKS_CLIENT_SECRET` env | Service principal client secret |
+| `--label` | No | Auto-derived from URL | Human-readable label per app (repeatable) |
+| `--compute-size` | No | Auto-detected or `medium` | Compute size tag per app: `medium`, `large` (repeatable) |
+| `--max-users` | No | `300` | Maximum concurrent simulated users |
+| `--step-size` | No | `20` | Users added per ramp step |
+| `--step-duration` | No | `30` | Seconds per ramp step |
+| `--spawn-rate` | No | `20` | User spawn rate (users/sec) |
+| `--run-name` | No | `<timestamp>` | Name for this run — results saved to `load-test-runs/<run-name>/` |
+| `--dashboard` | No | Off | Generate interactive HTML dashboard after tests |
+
+### Example Commands
+
+```bash
+cd load-test-scripts/
+
+# Quick single-app test:
+uv run run_load_test.py \
+    --app-url https://my-app.aws.databricksapps.com \
+    --client-id <ID> --client-secret <SECRET> \
+    --dashboard --run-name quick-test
+
+# Full matrix — 8 apps, overnight:
+uv run run_load_test.py \
+    --app-url https://my-app-medium-w2.aws.databricksapps.com \
+    --app-url https://my-app-medium-w4.aws.databricksapps.com \
+    --app-url https://my-app-large-w8.aws.databricksapps.com \
+    --app-url https://my-app-large-w10.aws.databricksapps.com \
+    --compute-size medium --compute-size medium \
+    --compute-size large --compute-size large \
+    --max-users 1000 --step-size 20 --step-duration 10 \
+    --dashboard --run-name overnight-sweep
+
+# Multiple runs for statistical consistency:
+for RUN in r1 r2 r3 r4 r5; do
+  uv run run_load_test.py \
+      --app-url ... \
+      --client-id <ID> --client-secret <SECRET> \
+      --max-users 1000 --step-size 20 --step-duration 10 \
+      --run-name my_test_${RUN} --dashboard || break
+done
+```
+
+### What Happens During a Run
+
+1. **Healthcheck** — verifies the app streams correctly (receives `[DONE]`)
+2. **Warmup** — sends sequential requests to warm up the app
+3. **Ramp-to-saturation** — steps up concurrent users every `step_duration` seconds
+4. **When QPS plateaus** despite adding users, you've found the saturation point
+
+### Estimated Duration
+
+- `(max_users / step_size) * step_duration` seconds per app
+- With defaults: `(300 / 20) * 30 = 15 steps * 30s = ~7.5 min` per app
+- For 4 apps: ~30 min per run
+
+---
+
+## Step 5: View Results Dashboard
+
+### Opening the Dashboard
+
+```bash
+open load-test-runs/<run-name>/dashboard.html
+```
+
+### Regenerating the Dashboard
+
+```bash
+cd load-test-scripts/
+uv run dashboard_template.py ../load-test-runs/<run-name>/
+```
+
+### What the Dashboard Shows
+
+- **KPI Cards** — Best config (peak QPS), overall peak QPS, lowest latency, total requests
+- **QPS by Config** — Grouped bars showing median QPS and peak QPS side-by-side
+- **Latency by Config** — Grouped bars showing p50 and p95 latency
+- **TTFT by Config** — Time to first token (p50 and p95)
+- **Total Requests Served** — How many requests each config handled
+- **QPS Ramp Progression** — Line charts with tabs for QPS, QPS (excl. failures), Latency, and Failures. Includes a **max-users slider** to zoom into lower concurrency ranges. Charts are grouped by compute size (medium/large).
+- **Full Results Table** — All configs with peak QPS, users at peak, latency percentiles, and failure rate
+- **Load Test Parameters** — Summary of test configuration for reproducibility
+
+### Interpreting Results
+
+- **Peak QPS** — Maximum QPS at any ramp step. This is the throughput ceiling.
+- **Users at Peak** — Concurrent users when peak QPS was achieved. More users beyond this doesn't help.
+- **Failure Rate** — Should be 0% or very low. High rates mean the app is overloaded.
+- **QPS Ramp Chart** — Look for where the line flattens. That's the saturation point.
+
+---
+
+## Troubleshooting
+
+| Issue | Solution |
+|-------|----------|
+| Auth token expired mid-test | Use M2M OAuth (`--client-id`/`--client-secret`) instead of static tokens |
+| Healthcheck fails | Verify app is ACTIVE: `databricks apps get <name> --output json` |
+| 0 QPS / no results | Check `load-test-runs/<run-name>/<label>/locust_output.log` for errors |
+| Low QPS despite high user count | App is saturated — try more workers or larger compute |
+| High failure rate | App is overloaded — reduce `--max-users` or increase workers/compute |
+| Dashboard shows no ramp data | Ensure `results_stats_history.csv` exists in each result subdir |
diff --git a/skills/load-testing/agents/openai.yaml b/skills/load-testing/agents/openai.yaml
new file mode 100644
index 0000000..747822e
--- /dev/null
+++ b/skills/load-testing/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Load Testing"
+  short_description: "Load test a Databricks App"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $load-testing for load testing a Databricks App to find max QPS."
diff --git a/skills/load-testing/assets/databricks.png b/skills/load-testing/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/load-testing/assets/databricks.png differ
diff --git a/skills/load-testing/assets/databricks.svg b/skills/load-testing/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/load-testing/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/long-running-server/SKILL.md b/skills/long-running-server/SKILL.md
new file mode 100644
index 0000000..d34fb7a
--- /dev/null
+++ b/skills/long-running-server/SKILL.md
@@ -0,0 +1,345 @@
+---
+name: long-running-server
+description: "Enable long-running background task support with LongRunningAgentServer. Use when: (1) Agent tasks may exceed HTTP timeout (~120s), (2) User wants background/async execution, (3) User says 'long running', 'background tasks', or 'async agent'."
+metadata:
+  version: "0.0.1"
+---
+
+# Enable Long-Running Agent Server
+
+> **Prerequisite:** Lakebase must be configured. If not already set up, follow the **lakebase-setup** skill first.
+
+Upgrades from `AgentServer` to `LongRunningAgentServer`, enabling background task execution that survives HTTP timeouts. Long-running tasks are persisted to Lakebase PostgreSQL so clients can poll or stream results.
+
+## What It Enables
+
+| Request pattern | Description |
+|---|---|
+| **Standard** | `POST /responses` — blocks until complete (queries ≤ 120s) |
+| **Background + Poll** | `POST /responses { background: true }` → `GET /responses/{id}` |
+| **Background + Stream** | `POST /responses { background: true, stream: true }` with cursor-based resumption via `starting_after` |
+
+---
+
+## Step 1: Add Dependency
+
+Add `databricks-ai-bridge[agent-server]` to `pyproject.toml`:
+
+```toml
+dependencies = [
+    # ... existing dependencies ...
+    "databricks-ai-bridge[agent-server]>=0.18.0",
+]
+```
+
+Run `uv sync` to install.
+
+---
+
+## Step 2: Update `start_server.py`
+
+Replace the basic `AgentServer` with `LongRunningAgentServer`. Key changes:
+
+1. Import `LongRunningAgentServer` instead of `AgentServer`
+2. Subclass it to override `transform_stream_event` (replaces placeholder IDs in streamed events)
+3. Pass Lakebase connection config and timeout settings
+4. Add a lifespan hook to initialize database tables at startup
+
+### OpenAI SDK
+
+```python
+"""Agent server entry point. load_dotenv must run before agent imports (auth config)."""
+
+# ruff: noqa: E402
+import os
+from contextlib import asynccontextmanager
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path=Path(__file__).parent.parent / ".env", override=True)
+
+import logging
+
+from databricks_ai_bridge.long_running import LongRunningAgentServer
+from mlflow.genai.agent_server import setup_mlflow_git_based_version_tracking
+
+from agent_server.utils import lakebase_config, replace_fake_id
+
+import agent_server.agent  # noqa: F401
+
+logger = logging.getLogger(__name__)
+
+
+class AgentServer(LongRunningAgentServer):
+    def transform_stream_event(self, event, response_id):
+        return replace_fake_id(event, response_id)
+
+
+agent_server = AgentServer(
+    "ResponsesAgent",
+    enable_chat_proxy=True,
+    db_instance_name=lakebase_config.instance_name,
+    db_autoscaling_endpoint=lakebase_config.autoscaling_endpoint,
+    db_project=lakebase_config.autoscaling_project,
+    db_branch=lakebase_config.autoscaling_branch,
+    task_timeout_seconds=float(os.getenv("TASK_TIMEOUT_SECONDS", "3600")),
+    poll_interval_seconds=float(os.getenv("POLL_INTERVAL_SECONDS", "1.0")),
+)
+
+log_level = os.getenv("LOG_LEVEL", "INFO")
+logging.getLogger("agent_server").setLevel(getattr(logging, log_level.upper(), logging.INFO))
+
+_original_lifespan = agent_server.app.router.lifespan_context
+
+
+@asynccontextmanager
+async def _lifespan(app):
+    # Initialize session/long-running tables at startup.
+    # If using AsyncDatabricksSession, create a throwaway session and call _ensure_tables().
+    async with _original_lifespan(app):
+        yield
+
+
+agent_server.app.router.lifespan_context = _lifespan
+
+app = agent_server.app  # noqa: F841
+setup_mlflow_git_based_version_tracking()
+
+
+def main():
+    agent_server.run(app_import_string="agent_server.start_server:app")
+```
+
+### LangGraph
+
+```python
+"""Agent server entry point. load_dotenv must run before agent imports (auth config)."""
+
+# ruff: noqa: E402
+import os
+from contextlib import asynccontextmanager
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path=Path(__file__).parent.parent / ".env", override=True)
+
+import logging
+
+from databricks_ai_bridge.long_running import LongRunningAgentServer
+from mlflow.genai.agent_server import setup_mlflow_git_based_version_tracking
+
+from agent_server.utils import replace_fake_id, LAKEBASE_CONFIG
+
+import agent_server.agent  # noqa: F401
+
+logger = logging.getLogger(__name__)
+
+
+class AgentServer(LongRunningAgentServer):
+    def transform_stream_event(self, event, response_id):
+        return replace_fake_id(event, response_id)
+
+
+agent_server = AgentServer(
+    "ResponsesAgent",
+    enable_chat_proxy=True,
+    db_instance_name=LAKEBASE_CONFIG.instance_name,
+    db_autoscaling_endpoint=LAKEBASE_CONFIG.autoscaling_endpoint,
+    db_project=LAKEBASE_CONFIG.autoscaling_project,
+    db_branch=LAKEBASE_CONFIG.autoscaling_branch,
+    task_timeout_seconds=float(os.getenv("TASK_TIMEOUT_SECONDS", "3600")),
+    poll_interval_seconds=float(os.getenv("POLL_INTERVAL_SECONDS", "1.0")),
+)
+
+app = agent_server.app  # noqa: F841
+setup_mlflow_git_based_version_tracking()
+
+_original_lifespan = app.router.lifespan_context
+
+
+@asynccontextmanager
+async def _lifespan(app):
+    # Initialize Lakebase tables at startup (e.g. run_lakebase_setup)
+    try:
+        async with _original_lifespan(app):
+            yield
+    except Exception as exc:
+        logger.warning("Long-running DB init failed: %s. Background mode disabled.", exc)
+        yield
+
+
+app.router.lifespan_context = _lifespan
+
+
+def main():
+    agent_server.run(app_import_string="agent_server.start_server:app")
+```
+
+---
+
+## Step 3: Add `replace_fake_id` Utility
+
+Add to `utils.py` if not already present. The implementation differs by SDK:
+
+### OpenAI SDK
+
+```python
+try:
+    from agents.models.fake_id import FAKE_RESPONSES_ID
+except ImportError:
+    FAKE_RESPONSES_ID = "__fake_id__"
+
+
+def replace_fake_id(obj, real_id: str):
+    """Recursively replace FAKE_RESPONSES_ID with real_id."""
+    if isinstance(obj, dict):
+        return {k: replace_fake_id(v, real_id) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [replace_fake_id(item, real_id) for item in obj]
+    elif isinstance(obj, str) and obj == FAKE_RESPONSES_ID:
+        return real_id
+    return obj
+```
+
+### LangGraph
+
+```python
+_FAKE_ID_PREFIX = "resp_placeholder_"
+
+
+def replace_fake_id(obj, real_id: str):
+    """Recursively replace any resp_placeholder_* ID with real_id."""
+    if isinstance(obj, dict):
+        return {k: replace_fake_id(v, real_id) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [replace_fake_id(item, real_id) for item in obj]
+    elif isinstance(obj, str) and obj.startswith(_FAKE_ID_PREFIX):
+        return real_id
+    return obj
+```
+
+---
+
+## Step 4: Add Lakebase Config
+
+Add to `utils.py` if not already present. This reads Lakebase connection parameters from environment variables:
+
+```python
+import os
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass(frozen=True)
+class LakebaseConfig:
+    instance_name: Optional[str]
+    autoscaling_endpoint: Optional[str]
+    autoscaling_project: Optional[str]
+    autoscaling_branch: Optional[str]
+
+
+def init_lakebase_config() -> LakebaseConfig:
+    """Read lakebase env vars. Priority: endpoint > project+branch > instance_name."""
+    endpoint = os.getenv("LAKEBASE_AUTOSCALING_ENDPOINT") or None
+    raw_name = os.getenv("LAKEBASE_INSTANCE_NAME") or None
+    project = os.getenv("LAKEBASE_AUTOSCALING_PROJECT") or None
+    branch = os.getenv("LAKEBASE_AUTOSCALING_BRANCH") or None
+
+    has_autoscaling = project and branch
+    if not endpoint and not raw_name and not has_autoscaling:
+        raise ValueError(
+            "Lakebase configuration is required. Set one of:\n"
+            "  LAKEBASE_AUTOSCALING_ENDPOINT=<endpoint>\n"
+            "  LAKEBASE_AUTOSCALING_PROJECT + LAKEBASE_AUTOSCALING_BRANCH\n"
+            "  LAKEBASE_INSTANCE_NAME=<instance-name>\n"
+        )
+
+    if endpoint:
+        return LakebaseConfig(instance_name=None, autoscaling_endpoint=endpoint,
+                              autoscaling_project=None, autoscaling_branch=None)
+    elif has_autoscaling:
+        return LakebaseConfig(instance_name=None, autoscaling_endpoint=None,
+                              autoscaling_project=project, autoscaling_branch=branch)
+    else:
+        return LakebaseConfig(instance_name=raw_name, autoscaling_endpoint=None,
+                              autoscaling_project=None, autoscaling_branch=None)
+
+
+# Module-level singleton
+lakebase_config = init_lakebase_config()
+```
+
+---
+
+## Step 5: Configure `databricks.yml`
+
+Add Lakebase resource and env vars per the **lakebase-setup** skill. The long-running server additionally uses these optional env vars:
+
+```yaml
+config:
+  env:
+    # ... existing env vars ...
+    - name: TASK_TIMEOUT_SECONDS
+      value: "3600"
+    - name: POLL_INTERVAL_SECONDS
+      value: "1.0"
+    - name: LOG_LEVEL
+      value: "INFO"
+```
+
+---
+
+## Step 6: Configure `.env` for Local Development
+
+Add Lakebase connection vars (see **lakebase-setup** skill for all options):
+
+```bash
+# Pick ONE mode:
+# Option 1: Autoscaling endpoint
+LAKEBASE_AUTOSCALING_ENDPOINT=<your-endpoint>
+# Option 2: Autoscaling project/branch
+LAKEBASE_AUTOSCALING_PROJECT=<project>
+LAKEBASE_AUTOSCALING_BRANCH=<branch>
+# Option 3: Provisioned instance
+LAKEBASE_INSTANCE_NAME=<instance-name>
+
+# Optional tuning
+TASK_TIMEOUT_SECONDS=3600
+POLL_INTERVAL_SECONDS=1.0
+LOG_LEVEL=INFO
+```
+
+---
+
+## Step 7: Deploy and Grant Permissions
+
+Follow the **lakebase-setup** skill Steps 5-7 to deploy, grant SP permissions, and run the app.
+
+---
+
+## Constructor Reference
+
+| Parameter | Type | Default | Description |
+|---|---|---|---|
+| `name` | `str` | required | Server name (e.g. `"ResponsesAgent"`) |
+| `enable_chat_proxy` | `bool` | `False` | Enable chat UI proxy endpoint |
+| `db_instance_name` | `str \| None` | `None` | Provisioned Lakebase instance name |
+| `db_autoscaling_endpoint` | `str \| None` | `None` | Autoscaling endpoint hostname |
+| `db_project` | `str \| None` | `None` | Autoscaling project name |
+| `db_branch` | `str \| None` | `None` | Autoscaling branch name |
+| `task_timeout_seconds` | `float` | `3600` | Max background task time before timeout |
+| `poll_interval_seconds` | `float` | `1.0` | Stream event poll interval |
+
+---
+
+## Troubleshooting
+
+| Issue | Cause | Solution |
+|---|---|---|
+| `ImportError: cannot import LongRunningAgentServer` | Missing dependency | Add `databricks-ai-bridge[agent-server]>=0.18.0` and `uv sync` |
+| `background=true` returns but no result | Lakebase not configured | Set Lakebase env vars in `.env` / `databricks.yml` |
+| Task times out | Long agent execution | Increase `TASK_TIMEOUT_SECONDS` |
+| Stream events have placeholder IDs | Missing `transform_stream_event` | Ensure `AgentServer` subclass overrides it |
+| DB initialization failed warning | Lakebase connection error | Check env vars and permissions (see **lakebase-setup** skill) |
diff --git a/skills/long-running-server/agents/openai.yaml b/skills/long-running-server/agents/openai.yaml
new file mode 100644
index 0000000..d3abeaa
--- /dev/null
+++ b/skills/long-running-server/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Long-Running Server"
+  short_description: "Background task support for agents"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $long-running-server for enabling long-running background tasks in an agent server."
diff --git a/skills/long-running-server/assets/databricks.png b/skills/long-running-server/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/long-running-server/assets/databricks.png differ
diff --git a/skills/long-running-server/assets/databricks.svg b/skills/long-running-server/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/long-running-server/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/migrate-from-model-serving/SKILL.md b/skills/migrate-from-model-serving/SKILL.md
new file mode 100644
index 0000000..b8e05a0
--- /dev/null
+++ b/skills/migrate-from-model-serving/SKILL.md
@@ -0,0 +1,967 @@
+---
+name: migrate-from-model-serving
+description: "Migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps. Use when: (1) User wants to migrate from Model Serving to Apps, (2) User has a ResponsesAgent with predict()/predict_stream() methods, (3) User wants to convert to @invoke/@stream decorators."
+metadata:
+  version: "0.0.1"
+---
+
+# Model Serving to Databricks Apps Migration Guide
+
+This guide instructs LLM coding agents how to migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps.
+
+---
+
+## Overview
+
+**Goal:** Migrate an agent deployed on Databricks Model Serving (using `ResponsesAgent` with `predict()`/`predict_stream()`) to Databricks Apps (using MLflow GenAI Server with `@invoke`/`@stream` decorators).
+
+**Key Transformation:**
+- Model Serving: Synchronous `predict()` and `predict_stream()` methods on a class
+- Apps: Functions with `@invoke` and `@stream` decorators (sync or async, based on user preference)
+
+**Deliverables:** After migration is complete, you will have:
+
+```
+<working-directory>/
+├── original_mlflow_model/    # Downloaded artifacts from Model Serving
+│   ├── MLmodel
+│   ├── code/
+│   │   └── agent.py
+│   ├── input_example.json
+│   └── requirements.txt
+│
+└── <app-name>/               # New Databricks App (ready to deploy)
+    ├── agent_server/
+    │   ├── agent.py          # Migrated agent code
+    │   └── ...
+    ├── databricks.yml        # Bundle config with resources
+    ├── pyproject.toml
+    ├── requirements.txt
+    └── ...
+```
+
+> **`<app-name>`** is the name the user provides at the start of the migration. It is used as both the directory name and the Databricks App name at deploy time.
+
+---
+
+## Before You Begin: Gather User Inputs
+
+**Before doing anything else, ask the user three questions.** Use the `AskUserQuestion` tool to collect all answers at once so the user is only prompted once, then Claude can execute the rest of the migration autonomously.
+
+**Questions to ask:**
+
+1. **Databricks profile:** Which Databricks CLI profile should be used for the workspace where the Model Serving endpoint lives? (Run `databricks auth profiles` first to list available profiles and their workspaces, then present the options to the user.)
+2. **App name:** What should the new Databricks App be named? (Must be lowercase, can contain letters, numbers, and hyphens, and must be unique within the workspace.)
+3. **Async migration:** Would you like to migrate your agent code to be fully async?
+   - **Yes (Recommended):** Converts all I/O operations to async (`await`/`async for`), enabling higher concurrency on smaller compute — no more threads sitting idle while waiting for LLM responses or long-running tool calls.
+   - **No:** Keeps your existing synchronous code with minimal changes — just extracts the logic from the `ResponsesAgent` class and wraps it with `@invoke`/`@stream` decorators. Simpler migration, but each request blocks a thread while waiting for I/O.
+
+Store the answers as:
+- `<profile>` — used for ALL `databricks` CLI commands throughout the migration (via `--profile <profile>`)
+- `<app-name>` — used as both the directory name for the migrated app AND the app name when deploying with `databricks bundle deploy`
+- `<async>` — `yes` or `no`, determines whether to convert the agent code to async or keep it synchronous
+
+### Validate Authentication
+
+After receiving the user's answers, validate the selected profile:
+
+```bash
+databricks current-user me --profile <profile>
+```
+
+If this fails with an authentication error, prompt the user to re-authenticate:
+
+```bash
+databricks auth login --profile <profile>
+```
+
+> **Important:** Remember to include `--profile <profile>` on every `databricks` CLI command throughout the migration.
+
+### Create the App Directory
+
+Copy all scaffold files from the current working directory into a new directory named `<app-name>/`. Exclude instruction files (`AGENTS.md`, `CLAUDE.md`), hidden directories (`.claude/`, `.git/`), and any migration artifacts (e.g., `original_mlflow_model/`, `.migration-venv/`). Do NOT search for or copy scaffold files from other directories or templates — everything you need is right here.
+
+All subsequent migration steps operate inside the `<app-name>/` directory.
+
+> **Note:** The `agent_server/agent.py` scaffold is intentionally framework-agnostic — it contains the `@invoke`/`@stream` decorator pattern with TODO placeholders. Step 3 (Migrate the Agent Code) will replace these placeholders with the actual agent logic from the original Model Serving endpoint.
+
+### Create Task List
+
+**Create a task list to track progress.** This helps the user follow along and see what's completed, in progress, and pending.
+
+> **User tip:** Press `Ctrl+T` to toggle the task list view in your terminal. The display shows up to 10 tasks at a time with status indicators.
+
+Create the following tasks using the `TaskCreate` tool:
+
+| Task | Description |
+|------|-------------|
+| **Authenticate to Databricks** | Verify Databricks CLI authentication and validate the selected profile |
+| **Download original agent artifacts** | Download the MLflow model artifacts from Model Serving endpoint |
+| **Analyze and understand agent code** | Examine the original agent code, identify tools, resources, and dependencies |
+| **Migrate agent code to Apps format** | Transform ResponsesAgent class to @invoke/@stream decorated functions |
+| **Set up and configure the app** | Install dependencies, run quickstart, configure environment |
+| **Test agent locally** | Start local server and verify the agent works correctly |
+| **Deploy to Databricks Apps** | Configure databricks.yml resources and deploy with Databricks Asset Bundles |
+| **Test deployed app** | Verify the deployed app responds correctly |
+
+Update task status as you progress:
+- Mark tasks as `in_progress` when starting each step
+- Mark tasks as `completed` when finished
+- This gives the user visibility into migration progress
+
+---
+
+## Step 1: Download the Original Agent Code
+
+> **Task:** Mark "Authenticate to Databricks" as `completed`. Mark "Download original agent artifacts" as `in_progress`.
+>
+> **Note:** The `<profile>` and `<app-name>` values were collected from the user in the "Before You Begin" section. Use them throughout.
+
+Download the original agent code from the Model Serving endpoint. This requires setting up a virtual environment with MLflow to access the model artifacts.
+
+### 1.1 Get Model Info from Endpoint
+
+If you have a serving endpoint name, extract the model details:
+
+```bash
+# Get endpoint info (remember to include --profile if using non-default)
+databricks serving-endpoints get <endpoint-name> --profile <profile> --output json
+```
+
+Look for `served_entities[0].entity_name` (model name) and `entity_version` in the response. Find the entity with 100% traffic in `traffic_config.routes`.
+
+### 1.2 Download Model Artifacts
+
+Use `uv run --with` to download artifacts without creating a separate virtual environment. The `mlflow[databricks]` extra includes `boto3` for Unity Catalog artifact access:
+
+```bash
+DATABRICKS_CONFIG_PROFILE=<profile> uv run --no-project \
+  --with "mlflow[databricks]>=2.15.0" \
+  --with "databricks-sdk>=0.30.0" \
+  python3 << 'EOF'
+import mlflow
+
+mlflow.set_tracking_uri("databricks")
+
+# Replace with actual values from step 1.1
+MODEL_NAME = "<model-name>"
+VERSION = "<version>"
+
+print(f"Downloading model: models:/{MODEL_NAME}/{VERSION}")
+mlflow.artifacts.download_artifacts(
+    artifact_uri=f"models:/{MODEL_NAME}/{VERSION}",
+    dst_path="./original_mlflow_model"
+)
+print("Download complete! Artifacts saved to ./original_mlflow_model")
+EOF
+```
+
+### 1.3 Verify Downloaded Artifacts
+
+Check that the key files exist and understand the full structure:
+
+```bash
+# List all downloaded files recursively
+find ./original_mlflow_model -type f | head -50
+
+# Check for MLmodel file (contains resource requirements)
+cat ./original_mlflow_model/MLmodel
+
+# Check for input example (useful for testing)
+cat ./original_mlflow_model/input_example.json 2>/dev/null
+```
+
+**Examine the `/code` folder** - contains all code dependencies logged via `code_paths=["..."]`:
+
+```bash
+# List all code files
+ls -la ./original_mlflow_model/code/
+
+# The main agent is typically agent.py, but there may be additional modules
+find ./original_mlflow_model/code -name "*.py" -type f
+```
+
+**Examine the `/artifacts` folder** (if present) - contains artifacts logged via `artifacts={...}`:
+
+```bash
+# Check for artifacts folder
+ls -la ./original_mlflow_model/artifacts/ 2>/dev/null
+
+# List all artifacts
+find ./original_mlflow_model/artifacts -type f 2>/dev/null
+```
+
+> **Important:** Take note of ALL files in `/code` and `/artifacts`. You will need to copy these to the migrated app and ensure imports still work correctly.
+
+### Expected Output Structure
+
+After successful download, you should have:
+
+```
+./original_mlflow_model/
+├── MLmodel              # Model metadata and resource requirements
+├── code/                # Code logged via code_paths=["..."]
+│   ├── agent.py         # Main agent implementation
+│   ├── utils.py         # (optional) Helper modules
+│   ├── tools.py         # (optional) Custom tool definitions
+│   └── ...              # Any other code dependencies
+├── artifacts/           # (optional) Artifacts logged via artifacts={...}
+│   ├── config.yaml      # (optional) Configuration files
+│   ├── prompts/         # (optional) Prompt templates
+│   └── ...              # Any other artifacts (data files, etc.)
+├── input_example.json   # Sample request for testing
+├── requirements.txt     # Original dependencies
+└── ...
+```
+
+### Key Files to Examine
+
+1. **`code/agent.py`** - Contains the `ResponsesAgent` class with `predict()` and `predict_stream()` methods
+2. **`code/*.py`** - Any additional Python modules the agent imports
+3. **`MLmodel`** - Contains the `resources` section listing required Databricks resources
+4. **`artifacts/`** - Any configuration files, prompts, or data files the agent uses
+5. **`input_example.json`** - Use this to test the migrated agent
+
+### Troubleshooting Model Download
+
+**"Unable to import necessary dependencies to access model version files in Unity Catalog"**
+This means `boto3` is missing. Ensure you're using `mlflow[databricks]` (not just `mlflow`) in the `--with` flag — the `[databricks]` extra includes `boto3`.
+
+**"INVALID_PARAMETER_VALUE" or authentication errors**
+Re-authenticate with Databricks (include profile if non-default):
+```bash
+databricks auth login --profile <profile>
+```
+
+**Wrong workspace / Model not found**
+Make sure you're using the correct profile that corresponds to the workspace where the model is deployed:
+```bash
+# List profiles to see which workspace each points to
+databricks auth profiles
+
+# Verify you can access the workspace
+databricks current-user me --profile <profile>
+
+# List models in that workspace
+databricks registered-models list --profile <profile>
+databricks model-versions list --name "<model-name>" --profile <profile>
+```
+
+---
+
+## Step 2: Understand the Key Transformations
+
+> **Task:** Mark "Download original agent artifacts" as `completed`. Mark "Analyze and understand agent code" as `in_progress`.
+
+### Entry Point Transformation
+
+In both cases, the `ResponsesAgent` class is replaced with decorated functions. The difference is whether those functions are async or sync.
+
+**Model Serving (OLD):**
+```python
+from mlflow.pyfunc import ResponsesAgent, ResponsesAgentRequest, ResponsesAgentResponse
+
+class MyAgent(ResponsesAgent):
+    def predict(self, request: ResponsesAgentRequest, params=None) -> ResponsesAgentResponse:
+        # Synchronous implementation
+        ...
+        return ResponsesAgentResponse(output=outputs)
+
+    def predict_stream(self, request: ResponsesAgentRequest, params=None):
+        # Synchronous generator
+        for chunk in ...:
+            yield ResponsesAgentStreamEvent(...)
+```
+
+**Apps — Async (if `<async>` = yes):**
+```python
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+    ResponsesAgentStreamEvent,
+)
+
+@invoke()
+async def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    # Async implementation - typically calls streaming() and collects results
+    outputs = [
+        event.item
+        async for event in streaming(request)
+        if event.type == "response.output_item.done"
+    ]
+    return ResponsesAgentResponse(output=outputs)
+
+@stream()
+async def streaming(request: ResponsesAgentRequest) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
+    # Async generator
+    async for event in ...:
+        yield event
+```
+
+**Apps — Sync (if `<async>` = no):**
+```python
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+    ResponsesAgentStreamEvent,
+)
+
+@invoke()
+def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    # Same sync logic from original predict(), extracted from the class
+    ...
+    return ResponsesAgentResponse(output=outputs)
+
+@stream()
+def streaming(request: ResponsesAgentRequest):
+    # Same sync generator from original predict_stream(), extracted from the class
+    for chunk in ...:
+        yield ResponsesAgentStreamEvent(...)
+```
+
+### Key Differences
+
+| Aspect | Model Serving | Apps (async) | Apps (sync) |
+|--------|--------------|------|------|
+| Structure | `class MyAgent(ResponsesAgent)` | Decorated functions | Decorated functions |
+| Functions | `def predict()` / `def predict_stream()` | `async def` with `await` | `def` (same as original) |
+| Streaming | Sync generator (`yield`) | Async generator (`async for` / `yield`) | Sync generator (`yield`) |
+| Server | MLflow Model Server | MLflow GenAI Server (FastAPI) | MLflow GenAI Server (FastAPI) |
+| Deployment | `databricks_agents.deploy()` | `databricks bundle deploy` + `bundle run` | `databricks bundle deploy` + `bundle run` |
+
+### Async Patterns (only if `<async>` = yes)
+
+> **Skip this section if the user chose synchronous migration.** The sync path keeps all original I/O calls as-is.
+
+All I/O operations must be converted to async:
+
+```python
+# OLD (sync)
+response = client.chat(messages)
+
+# NEW (async)
+response = await client.achat(messages)
+
+# OLD (sync iteration)
+for chunk in stream:
+    yield chunk
+
+# NEW (async iteration)
+async for chunk in stream:
+    yield chunk
+```
+
+---
+
+## Step 3: Migrate the Agent Code
+
+> **Task:** Mark "Analyze and understand agent code" as `completed`. Mark "Migrate agent code to Apps format" as `in_progress`.
+
+### 3.1 Copy Code Dependencies and Artifacts
+
+The original MLflow model may contain multiple code files and artifacts that need to be migrated.
+
+**Copy all code files from `/code` to `agent_server/`:**
+
+```bash
+# Copy all Python files from original code folder
+cp ./original_mlflow_model/code/*.py ./<app-name>/agent_server/
+
+# If there are subdirectories with code, copy those too
+# cp -r ./original_mlflow_model/code/submodule ./<app-name>/agent_server/
+```
+
+**Copy artifacts (if present):**
+
+```bash
+# Create an artifacts directory in the migrated app if needed
+mkdir -p ./<app-name>/agent_server/artifacts
+
+# Copy all artifacts
+cp -r ./original_mlflow_model/artifacts/* ./<app-name>/agent_server/artifacts/ 2>/dev/null || true
+```
+
+**Fix import paths after copying:**
+
+When code files are moved, imports may break. Check and update imports in all copied files:
+
+```python
+# BEFORE (if files were in different locations):
+from code.utils import helper_function
+from artifacts.prompts import SYSTEM_PROMPT
+
+# AFTER (files are now in agent_server/):
+from agent_server.utils import helper_function
+# Or if in same directory:
+from .utils import helper_function
+
+# For artifacts, update file paths:
+# BEFORE:
+with open("artifacts/config.yaml") as f:
+# AFTER:
+import os
+config_path = os.path.join(os.path.dirname(__file__), "artifacts", "config.yaml")
+with open(config_path) as f:
+```
+
+> **Important:** Review each copied file and ensure all imports resolve correctly. The most common issues are:
+> - Relative imports that assumed a different directory structure
+> - Hardcoded file paths to artifacts
+> - Missing `__init__.py` files for package imports
+
+### 3.2 Extract Configuration
+
+From the original agent code, identify and preserve:
+- **LLM endpoint name** (e.g., `databricks-claude-sonnet-4-5`)
+- **System prompt**
+- **Tool definitions**
+- **Any custom logic**
+
+### 3.3 Update the Agent Entry Point
+
+The approach depends on whether the user chose async or sync migration.
+
+---
+
+#### Path A: Synchronous Migration (`<async>` = no)
+
+This is the minimal-changes path. Extract the logic from the `ResponsesAgent` class, wrap it with `@invoke`/`@stream` decorators, and keep all code synchronous.
+
+Edit `<app-name>/agent_server/agent.py`:
+
+1. **Replace the scaffold with the original agent logic.** The core transformation is extracting the class methods into decorated functions:
+
+```python
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+    ResponsesAgentStreamEvent,
+)
+
+# Move any class __init__ or class-level setup to module level
+# e.g., client initialization, tool setup, etc.
+
+@invoke()
+def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    # Paste the body of the original predict() method here
+    # Remove 'self.' references — replace with module-level variables
+    # Remove 'params' parameter (not used in Apps)
+    ...
+    return ResponsesAgentResponse(output=outputs)
+
+@stream()
+def streaming(request: ResponsesAgentRequest):
+    # Paste the body of the original predict_stream() method here
+    # Remove 'self.' references — replace with module-level variables
+    # Remove 'params' parameter (not used in Apps)
+    for chunk in ...:
+        yield ResponsesAgentStreamEvent(...)
+```
+
+2. **Key changes from class to functions:**
+   - Remove the `class MyAgent(ResponsesAgent):` wrapper
+   - Remove `self` parameter from all methods
+   - Move `__init__` logic (client creation, tool setup) to module-level code
+   - Replace `self.some_attribute` with module-level variables
+   - Add `@invoke()` decorator to the non-streaming function
+   - Add `@stream()` decorator to the streaming function
+
+3. **Keep all other code as-is** — no need to convert sync calls to async, no need to change `for` to `async for`, no need to add `await`.
+
+---
+
+#### Path B: Async Migration (`<async>` = yes)
+
+This path converts all I/O operations to async for higher concurrency. More changes are required, but the result is a more efficient server.
+
+Edit `<app-name>/agent_server/agent.py`:
+
+1. **Update the LLM endpoint:**
+   ```python
+   LLM_ENDPOINT_NAME = "<your-endpoint-from-original>"
+   ```
+
+2. **Update the system prompt:**
+   ```python
+   SYSTEM_PROMPT = """<your-system-prompt-from-original>"""
+   ```
+
+3. **Add your custom tools:**
+   If your original agent had custom tools, add them:
+   ```python
+   from langchain_core.tools import tool
+
+   @tool
+   async def my_custom_tool(arg: str) -> str:
+       """Tool description."""
+       # Your tool logic (make async if needed)
+       return result
+   ```
+
+4. **Convert all I/O to async:**
+   - `def predict()` → `async def non_streaming()`
+   - `def predict_stream()` → `async def streaming()`
+   - `client.chat()` → `await client.achat()`
+   - `for chunk in stream:` → `async for chunk in stream:`
+   - Sync HTTP calls → `await` async equivalents
+
+5. **Preserve any special logic:**
+   Migrate any custom preprocessing, postprocessing, or business logic from the original agent.
+
+---
+
+### 3.4 Handle Stateful Agents
+
+**If original uses checkpointer (short-term memory):**
+- Add checkpointer with Lakebase integration (use `AsyncCheckpointSaver` if async, or sync equivalent if sync)
+- Configure `LAKEBASE_INSTANCE_NAME` in `.env`
+- Extract thread_id from `request.custom_inputs` or `request.context.conversation_id`
+
+**If original uses store (long-term memory):**
+- Add store with Lakebase integration (use `AsyncDatabricksStore` if async, or sync equivalent if sync)
+- Configure `LAKEBASE_INSTANCE_NAME` in `.env`
+- Extract user_id from `request.custom_inputs` or `request.context.user_id`
+
+---
+
+## Step 4: Set Up the App
+
+> **Task:** Mark "Migrate agent code to Apps format" as `completed`. Mark "Set up and configure the app" as `in_progress`.
+
+### 4.1 Verify Build Configuration
+
+Before installing dependencies, ensure a README file exists (hatchling requires this):
+
+**Ensure a README file exists:**
+
+```bash
+# Create a minimal README if one doesn't exist
+if [ ! -f "README.md" ]; then
+  echo "# Migrated Agent App" > README.md
+fi
+```
+
+### 4.2 Install Dependencies
+
+```bash
+cd <app-name>
+uv sync
+```
+
+### 4.3 Create requirements.txt for Databricks Apps
+
+Databricks Apps requires a `requirements.txt` file with `uv` to install dependencies from `pyproject.toml`:
+
+```bash
+echo "uv" > requirements.txt
+```
+
+### 4.4 Run Quickstart
+
+Run the `uv run quickstart` script to quickly set up your local environment. This is the **recommended** way to configure the app as it handles all necessary setup automatically.
+
+```bash
+uv run quickstart
+```
+
+This script will:
+
+1. Verify uv, nvm, and Databricks CLI installations
+2. Configure Databricks authentication
+3. Configure agent tracing, by creating and linking an MLflow experiment to your app
+4. Configure `.env` with the necessary environment variables
+
+> **Important:** The quickstart script creates the MLflow experiment that the app needs for logging traces and models. This experiment will be added as a resource when deploying the app.
+
+If there are issues with the quickstart script, refer to the manual setup in section 4.5.
+
+### 4.5 Manual Environment Configuration (Optional)
+
+If you need to manually configure the environment or add additional variables, edit `.env`:
+
+```bash
+# Databricks authentication
+DATABRICKS_CONFIG_PROFILE=<your-profile>
+
+# MLflow experiment (created by quickstart, or create manually)
+MLFLOW_EXPERIMENT_ID=<experiment-id>
+
+# Example: Lakebase for stateful agents
+LAKEBASE_INSTANCE_NAME=<your-lakebase-instance>
+
+# Example: Custom API keys
+MY_API_KEY=<value>
+```
+
+To manually create an MLflow experiment:
+
+```bash
+databricks experiments create-experiment "/Users/<your-username>/<app-name>" --profile <profile>
+```
+
+---
+
+## Step 5: Test Locally
+
+> **Task:** Mark "Set up and configure the app" as `completed`. Mark "Test agent locally" as `in_progress`.
+
+> Test your migrated agent locally before deploying to Databricks Apps. This helps catch configuration issues early and ensures the agent works correctly.
+
+### 5.1 Start the Server
+
+After the quickstart setup is complete, start the agent server and chat app locally:
+
+```bash
+cd <app-name>
+uv run start-app
+```
+
+Wait for the server to start. You should see output indicating the server is running on `http://localhost:8000`.
+
+> **Note:** If you only need the API endpoint (without the chat UI), you can run `uv run start-server` instead.
+
+### 5.2 Test with Original Input Example
+
+The original model artifacts include an `input_example.json` file that contains a sample request. Use this to verify your migrated agent produces the same behavior. If there's no valid sample request then figure out a valid sample request to query agent based on its code.
+
+```bash
+# Check the original input example (from the <app-name> directory)
+cat ../original_mlflow_model/input_example.json
+```
+
+Example content:
+```json
+{"input": [{"role": "user", "content": "What is an LLM agent?"}], "custom_inputs": {"thread_id": "example-thread-123"}}
+```
+
+Test your local server with this input:
+
+```bash
+# Test with the original input example
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d "$(cat ../original_mlflow_model/input_example.json)"
+```
+
+### 5.3 Test Basic Requests
+
+```bash
+# Non-streaming
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hello!"}]}'
+
+# Streaming
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hello!"}], "stream": true}'
+```
+
+### 5.4 Test with Custom Inputs (for stateful agents)
+
+```bash
+# With thread_id for short-term memory
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hi"}], "custom_inputs": {"thread_id": "test-123"}}'
+
+# With user_id for long-term memory
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hi"}], "custom_inputs": {"user_id": "user@example.com"}}'
+```
+
+### 5.5 Verify Before Proceeding
+
+Before proceeding to deployment, ensure:
+- [ ] The server starts without errors
+- [ ] The original input example returns a valid response
+- [ ] Streaming responses work correctly
+- [ ] Custom inputs (thread_id, user_id) are handled properly (if applicable)
+
+> **Note:** Only proceed to Step 6 (Deploy) after confirming the agent works correctly locally.
+
+---
+
+## Step 6: Deploy to Databricks Apps
+
+> **Task:** Mark "Test agent locally" as `completed`. Mark "Deploy to Databricks Apps" as `in_progress`.
+
+This step uses Databricks Asset Bundles (DAB) to deploy. The scaffold includes a `databricks.yml` that you need to update with the app name and resources from the original model.
+
+### 6.1 Extract Resources from Original Model
+
+The original model's `MLmodel` file contains a `resources` section that lists all Databricks resources the agent needs access to. Check `../original_mlflow_model/MLmodel` (or `./original_mlflow_model/MLmodel` if you're in the parent directory) for content like:
+
+```yaml
+resources:
+  api_version: '1'
+  databricks:
+    lakebase:
+    - name: lakebase
+    serving_endpoint:
+    - name: databricks-claude-sonnet-4-5
+```
+
+### 6.2 Update `databricks.yml` with Resources
+
+The scaffold includes a `databricks.yml` with the experiment resource pre-configured. You need to:
+
+1. **Update the app name** to `<app-name>` (the name provided by the user) in both the `resources.apps.agent_migration.name` field and the `targets.prod.resources.apps.agent_migration.name` field.
+2. **Add resources** extracted from the original MLmodel file to the `resources.apps.agent_migration.resources` list.
+
+**Resource Type Mapping (MLmodel → `databricks.yml`):**
+
+| MLmodel Resource | `databricks.yml` Resource | Key Fields |
+|------------------|--------------------------|------------|
+| `serving_endpoint` | `serving_endpoint` | `name`, `permission` (CAN_QUERY) |
+| `lakebase` | `database` | `database_name: databricks_postgres`, `instance_name`, `permission` (CAN_CONNECT_AND_CREATE) |
+| `vector_search_index` | `uc_securable` | `securable_full_name`, `securable_type: TABLE`, `permission: SELECT` |
+| `function` | `uc_securable` | `securable_full_name`, `securable_type: FUNCTION`, `permission: EXECUTE` |
+| `table` | `uc_securable` | `securable_full_name`, `securable_type: TABLE`, `permission: SELECT` |
+| `uc_connection` | `uc_securable` | `securable_full_name`, `securable_type: CONNECTION`, `permission: USE_CONNECTION` |
+| `sql_warehouse` | `sql_warehouse` | `id`, `permission` (CAN_USE) |
+| `genie_space` | `genie_space` | `space_id`, `permission` (CAN_RUN) |
+
+> **Note:** The `experiment` resource is already configured in the scaffold `databricks.yml` and is automatically created by the bundle. You do not need to add it manually.
+
+**Example: `databricks.yml` for an agent with a serving endpoint and UC function:**
+
+```yaml
+resources:
+  experiments:
+    agent_migration_experiment:
+      name: /Users/${workspace.current_user.userName}/${bundle.name}-${bundle.target}
+
+  apps:
+    agent_migration:
+      name: "<app-name>"  # Update to user's app name
+      description: "Migrated agent from Model Serving to Databricks Apps"
+      source_code_path: ./
+      resources:
+        - name: 'experiment'
+          experiment:
+            experiment_id: "${resources.experiments.agent_migration_experiment.id}"
+            permission: 'CAN_MANAGE'
+        - name: 'serving-endpoint'
+          serving_endpoint:
+            name: 'databricks-claude-sonnet-4-5'
+            permission: 'CAN_QUERY'
+        - name: 'python-exec'
+          uc_securable:
+            securable_full_name: 'system.ai.python_exec'
+            securable_type: 'FUNCTION'
+            permission: 'EXECUTE'
+
+targets:
+  prod:
+    resources:
+      apps:
+        agent_migration:
+          name: "<app-name>"  # Same name for production
+```
+
+**Example: Adding Lakebase resources (for stateful agents):**
+
+```yaml
+        - name: 'database'
+          database:
+            database_name: 'databricks_postgres'
+            instance_name: 'lakebase'
+            permission: 'CAN_CONNECT_AND_CREATE'
+```
+
+### 6.3 Deploy with Databricks Asset Bundles
+
+From inside the `<app-name>` directory, validate, deploy, and run:
+
+```bash
+# 1. Validate bundle configuration (catches errors before deploy)
+databricks bundle validate --profile <profile>
+
+# 2. Deploy the bundle (creates/updates resources, uploads files)
+databricks bundle deploy --profile <profile>
+
+# 3. Run the app (starts/restarts with uploaded source code) - REQUIRED!
+databricks bundle run agent_migration --profile <profile>
+```
+
+> **Important:** `bundle deploy` only uploads files and configures resources. `bundle run` is **required** to actually start/restart the app with the new code. If you only run `deploy`, the app will continue running old code!
+
+### 6.4 Test Deployed App
+
+> **Task:** Mark "Deploy to Databricks Apps" as `completed`. Mark "Test deployed app" as `in_progress`.
+
+```bash
+# Get the app URL
+APP_URL=$(databricks apps get <app-name> --profile <profile> --output json | jq -r '.url')
+
+# Get OAuth token
+TOKEN=$(databricks auth token --profile <profile> | jq -r .access_token)
+
+# Query the app
+curl -X POST ${APP_URL}/invocations \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{"input": [{"role": "user", "content": "Hello!"}]}'
+```
+
+Once the deployed app responds successfully:
+
+> **Task:** Mark "Test deployed app" as `completed`. Migration complete!
+
+### 6.5 Deployment Troubleshooting
+
+If you encounter issues during deployment, refer to the **deploy** skill for detailed guidance.
+
+**Debug commands:**
+```bash
+# Validate bundle configuration
+databricks bundle validate --profile <profile>
+
+# View app logs
+databricks apps logs <app-name> --profile <profile> --follow
+
+# Check app status
+databricks apps get <app-name> --profile <profile> --output json | jq '{app_status, compute_status}'
+
+# Get app URL
+databricks apps get <app-name> --profile <profile> --output json | jq -r '.url'
+```
+
+**"App already exists" error:**
+If `databricks bundle deploy` fails because the app already exists, refer to the **deploy** skill for instructions on binding an existing app to the bundle.
+
+---
+
+## Reference: App File Structure
+
+```
+<app-name>/
+├── agent_server/
+│   ├── __init__.py
+│   ├── agent.py          # Main agent logic - THIS IS WHERE YOU MIGRATE TO
+│   ├── start_server.py   # FastAPI server setup
+│   ├── utils.py          # Helper utilities
+│   └── evaluate_agent.py # Agent evaluation
+├── scripts/
+│   ├── __init__.py
+│   ├── quickstart.py     # Setup script
+│   └── start_app.py      # App startup
+├── databricks.yml        # Databricks Asset Bundle configuration (resources, config, targets)
+├── pyproject.toml        # Dependencies (for local dev with uv)
+├── requirements.txt      # REQUIRED: Must contain "uv" for Databricks Apps
+├── .env.example          # Environment template
+└── README.md
+```
+
+> **IMPORTANT:** The `requirements.txt` file must exist and contain `uv` so that Databricks Apps can install dependencies using the `pyproject.toml`. Without this file, the app will fail to start.
+
+---
+
+## Reference: Common Migration Patterns
+
+### Pattern 1: Simple Chat Agent
+
+**Original:**
+```python
+class ChatAgent(ResponsesAgent):
+    def predict(self, request, params=None):
+        messages = to_chat_completions_input(request.input)
+        response = self.llm.invoke(messages)
+        return ResponsesAgentResponse(output=[...])
+```
+
+**Migrated (sync):**
+```python
+llm = ...  # Move class-level init to module level
+
+@invoke()
+def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    messages = to_chat_completions_input(request.input)
+    response = llm.invoke(messages)
+    return ResponsesAgentResponse(output=[...])
+
+@stream()
+def streaming(request: ResponsesAgentRequest):
+    # Original predict_stream() body, with self. removed
+    ...
+```
+
+**Migrated (async):**
+```python
+@invoke()
+async def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    outputs = [e.item async for e in streaming(request) if e.type == "response.output_item.done"]
+    return ResponsesAgentResponse(output=outputs)
+
+@stream()
+async def streaming(request: ResponsesAgentRequest) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
+    messages = {"messages": to_chat_completions_input([i.model_dump() for i in request.input])}
+    agent = await init_agent()
+    async for event in process_agent_astream_events(agent.astream(messages, stream_mode=["updates", "messages"])):
+        yield event
+```
+
+### Pattern 2: Agent with Custom Tools
+
+**Sync:** Keep tools as-is from the original code.
+
+**Async:** Migrate tools to async LangChain tools:
+
+```python
+from langchain_core.tools import tool
+
+@tool
+async def search_docs(query: str) -> str:
+    """Search the documentation."""
+    results = await vector_store.asimilarity_search(query)
+    return format_results(results)
+```
+
+### Pattern 3: Using LangGraph with create_agent (async only)
+
+```python
+from langchain.agents import create_agent
+from databricks_langchain import ChatDatabricks
+
+async def init_agent():
+    tools = await mcp_client.get_tools()  # MCP tools are async
+    model = ChatDatabricks(endpoint=LLM_ENDPOINT_NAME)
+    return create_agent(model=model, tools=tools, system_prompt=SYSTEM_PROMPT)
+```
+
+---
+
+## Reference: Useful Resources
+
+- **Responses API Docs:** https://mlflow.org/docs/latest/genai/serving/responses-agent/
+- **Agent Framework:** https://docs.databricks.com/aws/en/generative-ai/agent-framework/
+- **Agent Tools:** https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool
+- **databricks-langchain SDK:** https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain
+
+---
+
+## Troubleshooting
+
+### "Module not found" errors
+```bash
+uv sync  # Reinstall dependencies
+```
+
+### Authentication errors
+```bash
+databricks auth login  # Re-authenticate
+```
+
+### Lakebase permission errors
+- Ensure the Lakebase instance is added as an app resource in Databricks UI
+- Grant appropriate permissions on the Lakebase instance
+
+### Async errors (async migration only)
+- Ensure all I/O calls use async versions (e.g., `await client.achat()` not `client.chat()`)
+- Use `async for` instead of `for` when iterating async generators
+- If you chose sync migration, these errors should not occur — double-check that you're not mixing sync and async patterns
diff --git a/skills/migrate-from-model-serving/agents/openai.yaml b/skills/migrate-from-model-serving/agents/openai.yaml
new file mode 100644
index 0000000..62547c7
--- /dev/null
+++ b/skills/migrate-from-model-serving/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Migrate from Model Serving"
+  short_description: "Migrate ResponsesAgent to Apps"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $migrate-from-model-serving for migrating an MLflow ResponsesAgent from Model Serving to Databricks Apps."
diff --git a/skills/migrate-from-model-serving/assets/databricks.png b/skills/migrate-from-model-serving/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/migrate-from-model-serving/assets/databricks.png differ
diff --git a/skills/migrate-from-model-serving/assets/databricks.svg b/skills/migrate-from-model-serving/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/migrate-from-model-serving/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/modify-langgraph-agent/SKILL.md b/skills/modify-langgraph-agent/SKILL.md
new file mode 100644
index 0000000..bc4edbf
--- /dev/null
+++ b/skills/modify-langgraph-agent/SKILL.md
@@ -0,0 +1,299 @@
+---
+name: modify-langgraph-agent
+description: "Modify agent code, add tools, or change configuration. Use when: (1) User says 'modify agent', 'add tool', 'change model', or 'edit agent.py', (2) Adding MCP servers to agent, (3) Changing agent instructions, (4) Understanding SDK patterns."
+metadata:
+  version: "0.0.1"
+---
+
+# Modify the Agent
+
+## Main File
+
+**`agent_server/agent.py`** - Agent logic, model selection, instructions, MCP servers
+
+## Key Files
+
+| File                             | Purpose                                       |
+| -------------------------------- | --------------------------------------------- |
+| `agent_server/agent.py`          | Agent logic, model, instructions, MCP servers |
+| `agent_server/start_server.py`   | FastAPI server + MLflow setup                 |
+| `agent_server/evaluate_agent.py` | Agent evaluation with MLflow scorers          |
+| `agent_server/utils.py`          | Databricks auth helpers, stream processing    |
+| `databricks.yml`                 | Bundle config & resource permissions          |
+
+## SDK Setup
+
+```python
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_langchain import ChatDatabricks, DatabricksMCPServer, DatabricksMultiServerMCPClient
+from langchain.agents import create_agent
+
+# Enable autologging for tracing
+mlflow.langchain.autolog()
+
+# Initialize workspace client
+workspace_client = WorkspaceClient()
+```
+
+---
+
+## databricks-langchain SDK Overview
+
+**SDK Location:** https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain
+
+Before making any changes, ensure that the APIs actually exist in the SDK. If something is missing from the documentation here, look in the venv's `site-packages` directory for the `databricks_langchain` package. If it's not installed, run `uv sync` to create the .venv and install the package.
+
+---
+
+### ChatDatabricks - LLM Chat Interface
+
+Connects to Databricks Model Serving endpoints for LLM inference.
+
+```python
+from databricks_langchain import ChatDatabricks
+
+llm = ChatDatabricks(
+    endpoint="databricks-claude-3-7-sonnet",  # or databricks-meta-llama-3-1-70b-instruct
+    temperature=0,
+    max_tokens=500,
+)
+
+# For Responses API agents:
+llm = ChatDatabricks(endpoint="my-agent-endpoint", use_responses_api=True)
+```
+
+Available models (check workspace for current list):
+
+- `databricks-claude-3-7-sonnet`
+- `databricks-claude-3-5-sonnet`
+- `databricks-meta-llama-3-3-70b-instruct`
+
+**Note:** Some workspaces require granting the app access to the serving endpoint in `databricks.yml`. See the **add-tools** skill and `examples/serving-endpoint.yaml`.
+
+---
+
+### DatabricksEmbeddings - Generate Embeddings
+
+Query Databricks embedding model endpoints.
+
+```python
+from databricks_langchain import DatabricksEmbeddings
+
+embeddings = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
+vector = embeddings.embed_query("The meaning of life is 42")
+vectors = embeddings.embed_documents(["doc1", "doc2"])
+```
+
+---
+
+### DatabricksVectorSearch - Vector Store
+
+Connect to Databricks Vector Search indexes for similarity search.
+
+```python
+from databricks_langchain import DatabricksVectorSearch
+
+# Delta-sync index with Databricks-managed embeddings
+vs = DatabricksVectorSearch(index_name="catalog.schema.index_name")
+
+# Direct-access or self-managed embeddings
+vs = DatabricksVectorSearch(
+    index_name="catalog.schema.index_name",
+    embedding=embeddings,
+    text_column="content",
+)
+
+docs = vs.similarity_search("query", k=5)
+```
+
+---
+
+### MCP Client - Tool Integration
+
+Connect to MCP (Model Context Protocol) servers to get tools for your agent.
+
+**Basic MCP Server (manual URL):**
+
+```python
+from databricks_langchain import DatabricksMCPServer, DatabricksMultiServerMCPClient
+
+client = DatabricksMultiServerMCPClient([
+    DatabricksMCPServer(
+        name="system-ai",
+        url=f"{host}/api/2.0/mcp/functions/system/ai",
+    )
+])
+tools = await client.get_tools()
+```
+
+**From UC Function (convenience helper):**
+
+Creates MCP server for Unity Catalog functions. If `function_name` is omitted, exposes all functions in the schema.
+
+```python
+server = DatabricksMCPServer.from_uc_function(
+    catalog="main",
+    schema="tools",
+    function_name="send_email",  # Optional - omit for all functions in schema
+    name="email-server",
+    timeout=30.0,
+    handle_tool_error=True,
+)
+```
+
+**From Vector Search (convenience helper):**
+
+Creates MCP server for Vector Search indexes. If `index_name` is omitted, exposes all indexes in the schema.
+
+```python
+server = DatabricksMCPServer.from_vector_search(
+    catalog="main",
+    schema="embeddings",
+    index_name="product_docs",  # Optional - omit for all indexes in schema
+    name="docs-search",
+    timeout=30.0,
+)
+```
+
+**From Genie Space:**
+
+Create MCP server from Genie Space. Get the genie space ID from the URL.
+
+Example: `https://workspace.cloud.databricks.com/genie/rooms/01f0515f6739169283ef2c39b7329700?o=123` means the genie space ID is `01f0515f6739169283ef2c39b7329700`
+
+```python
+DatabricksMCPServer(
+    name="genie",
+    url=f"{host_name}/api/2.0/mcp/genie/01f0515f6739169283ef2c39b7329700",
+)
+```
+
+**Non-Databricks MCP Server:**
+
+```python
+from databricks_langchain import MCPServer
+
+server = MCPServer(
+    name="external-server",
+    url="https://other-server.com/mcp",
+    headers={"X-API-Key": "secret"},
+    timeout=15.0,
+)
+```
+
+**After adding MCP servers:** Grant permissions in `databricks.yml` (see **add-tools** skill)
+
+---
+
+## Running the Agent
+
+```python
+from langchain.agents import create_agent
+
+# Create agent - ONLY accepts tools and model, NO prompt/instructions parameter
+agent = create_agent(tools=tools, model=llm)
+
+# Non-streaming
+messages = {"messages": [{"role": "user", "content": "hi"}]}
+result = await agent.ainvoke(messages)
+
+# Streaming
+async for event in agent.astream(input=messages, stream_mode=["updates", "messages"]):
+    # Process stream events
+    pass
+```
+
+**Converting to Responses API format:** Use `process_agent_astream_events()` from `agent_server/utils.py`:
+
+```python
+from agent_server.utils import process_agent_astream_events
+
+async for event in process_agent_astream_events(
+    agent.astream(input=messages, stream_mode=["updates", "messages"])
+):
+    yield event  # Yields ResponsesAgentStreamEvent objects
+```
+
+---
+
+## Customizing Agent Behavior (System Instructions)
+
+> **IMPORTANT:** `create_agent()` does NOT accept `prompt`, `instructions`, or `system_message` parameters. Attempting to pass these will cause a runtime error.
+
+In LangGraph, agent behavior is customized by prepending a system message to the conversation messages.
+
+**Correct pattern in `agent.py`:**
+
+1. Define instructions as a constant:
+
+```python
+AGENT_INSTRUCTIONS = """You are a helpful data analyst assistant.
+
+You have access to:
+- Company sales data via Genie
+- Product documentation via vector search
+
+Always cite your sources when answering questions."""
+```
+
+2. Prepend to messages in the `streaming()` function:
+
+```python
+@stream()
+async def streaming(request: ResponsesAgentRequest) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
+    agent = await init_agent()
+    # Prepend system instructions to user messages
+    user_messages = to_chat_completions_input([i.model_dump() for i in request.input])
+    messages = {"messages": [{"role": "system", "content": AGENT_INSTRUCTIONS}] + user_messages}
+
+    async for event in process_agent_astream_events(
+        agent.astream(input=messages, stream_mode=["updates", "messages"])
+    ):
+        yield event
+```
+
+**Common mistake to avoid:**
+
+```python
+# WRONG - will cause "unexpected keyword argument" error
+agent = create_agent(tools=tools, model=llm, prompt=AGENT_INSTRUCTIONS)
+
+# CORRECT - add instructions via messages
+messages = {"messages": [{"role": "system", "content": AGENT_INSTRUCTIONS}] + user_messages}
+```
+
+For advanced customization (routing, state management, custom graphs), refer to the [LangGraph documentation](https://docs.langchain.com/oss/python/langgraph/overview).
+
+---
+
+## External Connection Tools
+
+Connect to external services via Unity Catalog HTTP connections:
+
+- **Slack** - Post messages to channels
+- **Google Calendar** - Calendar operations
+- **Microsoft Graph API** - Office 365 services
+- **Azure AI Search** - Search functionality
+- **Any HTTP API** - Use `http_request` from databricks-sdk
+
+Example: Create UC function wrapping HTTP request for Slack, then expose via MCP.
+
+---
+
+## External Resources
+
+1. [databricks-langchain SDK](https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain)
+2. [Agent examples](https://github.com/databricks/app-templates)
+3. [Agent Framework docs](https://docs.databricks.com/aws/en/generative-ai/agent-framework/)
+4. [Adding tools](https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool)
+5. [LangGraph documentation](https://docs.langchain.com/oss/python/langgraph/overview)
+6. [Responses API](https://mlflow.org/docs/latest/genai/serving/responses-agent/)
+
+## Next Steps
+
+- Discover available tools: see **discover-tools** skill
+- Grant resource permissions: see **add-tools** skill
+- Add memory capabilities: see **agent-memory** skill
+- Test locally: see **run-locally** skill
+- Deploy: see **deploy** skill
diff --git a/skills/modify-langgraph-agent/agents/openai.yaml b/skills/modify-langgraph-agent/agents/openai.yaml
new file mode 100644
index 0000000..a2e333c
--- /dev/null
+++ b/skills/modify-langgraph-agent/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Modify Agent (LangGraph)"
+  short_description: "Edit LangGraph agent code"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $modify-langgraph-agent for modifying agent code, tools, or configuration in a LangGraph template."
diff --git a/skills/modify-langgraph-agent/assets/databricks.png b/skills/modify-langgraph-agent/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/modify-langgraph-agent/assets/databricks.png differ
diff --git a/skills/modify-langgraph-agent/assets/databricks.svg b/skills/modify-langgraph-agent/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/modify-langgraph-agent/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/modify-openai-agent/SKILL.md b/skills/modify-openai-agent/SKILL.md
new file mode 100644
index 0000000..ee2e4f9
--- /dev/null
+++ b/skills/modify-openai-agent/SKILL.md
@@ -0,0 +1,149 @@
+---
+name: modify-openai-agent
+description: "Modify agent code, add tools, or change configuration. Use when: (1) User says 'modify agent', 'add tool', 'change model', or 'edit agent.py', (2) Adding MCP servers to agent, (3) Changing agent instructions, (4) Understanding SDK patterns."
+metadata:
+  version: "0.0.1"
+---
+
+# Modify the Agent
+
+## Main File
+
+**`agent_server/agent.py`** - Agent logic, model selection, instructions, MCP servers
+
+## Key Files
+
+| File                             | Purpose                                       |
+| -------------------------------- | --------------------------------------------- |
+| `agent_server/agent.py`          | Agent logic, model, instructions, MCP servers |
+| `agent_server/start_server.py`   | FastAPI server + MLflow setup                 |
+| `agent_server/evaluate_agent.py` | Agent evaluation with MLflow scorers          |
+| `agent_server/utils.py`          | Databricks auth helpers, stream processing    |
+| `databricks.yml`                 | Bundle config & resource permissions          |
+
+## SDK Setup
+
+```python
+import mlflow
+from databricks_openai import AsyncDatabricksOpenAI
+from agents import set_default_openai_api, set_default_openai_client, Agent
+from agents.tracing import set_trace_processors
+
+# Set up async client (recommended for agent servers)
+set_default_openai_client(AsyncDatabricksOpenAI())
+set_default_openai_api("chat_completions")
+
+# Use MLflow for tracing (disables SDK's built-in tracing)
+set_trace_processors([])
+mlflow.openai.autolog()
+```
+
+## Adding MCP Servers
+
+```python
+from databricks_openai.agents import McpServer
+
+# UC Functions
+uc_server = McpServer(
+    url=f"{host}/api/2.0/mcp/functions/{catalog}/{schema}",
+    name="uc functions",
+)
+
+# Genie Space
+genie_server = McpServer(
+    url=f"{host}/api/2.0/mcp/genie/{space_id}",
+    name="genie space",
+)
+
+# Vector Search
+vector_server = McpServer(
+    url=f"{host}/api/2.0/mcp/vector-search/{catalog}/{schema}/{index}",
+    name="vector search",
+)
+
+# Add to agent
+agent = Agent(
+    name="my agent",
+    instructions="You are a helpful agent.",
+    model="databricks-claude-3-7-sonnet",
+    mcp_servers=[uc_server, genie_server, vector_server],
+)
+```
+
+**After adding MCP servers:** Grant permissions in `databricks.yml` (see **add-tools** skill)
+
+## Changing the Model
+
+Available models (check workspace for current list):
+
+- `databricks-claude-3-7-sonnet`
+- `databricks-claude-3-5-sonnet`
+- `databricks-meta-llama-3-3-70b-instruct`
+
+```python
+agent = Agent(
+    name="my agent",
+    model="databricks-claude-3-7-sonnet",  # Change here
+    ...
+)
+```
+
+**Note:** Some workspaces require granting the app access to the serving endpoint in `databricks.yml`. See the **add-tools** skill and `examples/serving-endpoint.yaml`.
+
+## Changing Instructions
+
+```python
+agent = Agent(
+    name="my agent",
+    instructions="""You are a helpful data analyst assistant.
+
+    You have access to:
+    - Company sales data via Genie
+    - Product documentation via vector search
+
+    Always cite your sources when answering questions.""",
+    ...
+)
+```
+
+## Running the Agent
+
+```python
+from agents import Runner
+
+# Non-streaming
+messages = [{"role": "user", "content": "hi"}]
+result = await Runner.run(agent, messages)
+
+# Streaming
+result = Runner.run_streamed(agent, input=messages)
+async for event in result.stream_events():
+    # Process stream events
+    pass
+```
+
+**Converting to Responses API format:** Use `process_agent_stream_events()` from `agent_server/utils.py` to convert streaming output to Responses API compatible format:
+
+```python
+from agent_server.utils import process_agent_stream_events
+
+result = Runner.run_streamed(agent, input=messages)
+async for event in process_agent_stream_events(result.stream_events()):
+    yield event  # Yields ResponsesAgentStreamEvent objects
+```
+
+## External Resources
+
+1. [databricks-openai SDK](https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/openai)
+2. [Agent examples](https://github.com/databricks/app-templates)
+3. [Agent Framework docs](https://docs.databricks.com/aws/en/generative-ai/agent-framework/)
+4. [Adding tools](https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool)
+5. [OpenAI Agents SDK](https://platform.openai.com/docs/guides/agents-sdk)
+6. [Responses API](https://mlflow.org/docs/latest/genai/serving/responses-agent/)
+
+## Next Steps
+
+- Discover available tools: see **discover-tools** skill
+- Grant resource permissions: see **add-tools** skill
+- Test locally: see **run-locally** skill
+- Deploy: see **deploy** skill
diff --git a/skills/modify-openai-agent/agents/openai.yaml b/skills/modify-openai-agent/agents/openai.yaml
new file mode 100644
index 0000000..a58ac3e
--- /dev/null
+++ b/skills/modify-openai-agent/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Modify Agent (OpenAI)"
+  short_description: "Edit OpenAI agent code"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $modify-openai-agent for modifying agent code, tools, or configuration in an OpenAI Agents SDK template."
diff --git a/skills/modify-openai-agent/assets/databricks.png b/skills/modify-openai-agent/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/modify-openai-agent/assets/databricks.png differ
diff --git a/skills/modify-openai-agent/assets/databricks.svg b/skills/modify-openai-agent/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/modify-openai-agent/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/quickstart/SKILL.md b/skills/quickstart/SKILL.md
new file mode 100644
index 0000000..230ff20
--- /dev/null
+++ b/skills/quickstart/SKILL.md
@@ -0,0 +1,120 @@
+---
+name: quickstart
+description: "Set up Databricks agent development environment. Use when: (1) First time setup, (2) Configuring Databricks authentication, (3) User says 'quickstart', 'set up', 'authenticate', or 'configure databricks', (4) No .env file exists."
+metadata:
+  version: "0.0.1"
+---
+
+# Quickstart & Authentication
+
+## Prerequisites
+
+- **uv** (Python package manager)
+- **nvm** with Node 20 (for frontend)
+- **Databricks CLI v0.283.0+**
+
+Check CLI version:
+```bash
+databricks -v  # Must be v0.283.0 or above
+brew upgrade databricks  # If version is too old
+```
+
+## Run Quickstart
+
+```bash
+uv run quickstart
+```
+
+**Options:**
+- `--profile NAME`: Use specified profile (non-interactive)
+- `--host URL`: Workspace URL for initial setup
+{{LAKEBASE_OPTIONS}}- `--skip-lakebase`: Skip Lakebase setup (non-interactive / CI use)
+- `--app-name NAME`: Existing Databricks app name to bind this bundle to
+- `-h, --help`: Show help
+
+**Examples:**
+```bash
+# Interactive (prompts for profile selection)
+uv run quickstart
+
+# Non-interactive with existing profile
+uv run quickstart --profile DEFAULT
+
+# New workspace setup
+uv run quickstart --host https://your-workspace.cloud.databricks.com
+
+# Bind to an existing app created via the Databricks UI
+uv run quickstart --app-name my-existing-app
+
+# Skip Lakebase setup (CI / non-interactive)
+uv run quickstart --profile DEFAULT --skip-lakebase
+{{LAKEBASE_EXAMPLES}}```
+
+## What Quickstart Configures
+
+Creates/updates `.env` with:
+- `DATABRICKS_CONFIG_PROFILE` - Selected CLI profile
+- `MLFLOW_TRACKING_URI` - Set to `databricks://<profile-name>` for local auth
+- `MLFLOW_EXPERIMENT_ID` - Auto-created experiment ID
+{{LAKEBASE_CONFIGURES_ENV}}
+Updates `databricks.yml`:
+- Sets `experiment_id` in the app's experiment resource
+- Updates app `name` field if `--app-name` is provided
+{{LAKEBASE_CONFIGURES_YML}}
+
+## Existing App
+
+If you created an app via the Databricks UI before cloning a template, use `--app-name` to bind the bundle to it:
+
+```bash
+uv run quickstart --app-name my-existing-app
+```
+
+Quickstart will update `databricks.yml` with the app name and print the binding command:
+```bash
+databricks bundle deployment bind <KEY> my-existing-app --auto-approve
+databricks bundle deploy
+```
+
+This avoids the "An app with the same name already exists" error on first deploy.
+
+## Idempotency
+
+Re-running quickstart is safe:
+- **Experiment**: If `MLFLOW_EXPERIMENT_ID` is already in `.env` and the experiment still exists, it is reused (no duplicate created).
+- **Lakebase**: If Lakebase config is already in `.env`, the interactive prompt is skipped and the existing config is reused.
+
+## Manual Authentication (Fallback)
+
+If quickstart fails:
+
+```bash
+# Create new profile
+databricks auth login --host https://your-workspace.cloud.databricks.com
+
+# Verify
+databricks auth profiles
+```
+
+Then manually create `.env` (copy from `.env.example`):
+```bash
+# Authentication (choose one method)
+DATABRICKS_CONFIG_PROFILE=DEFAULT
+# DATABRICKS_HOST=https://<your-workspace-here>.databricks.com
+# DATABRICKS_TOKEN=dapi....
+
+# MLflow configuration
+MLFLOW_EXPERIMENT_ID=<your-experiment-id>
+MLFLOW_TRACKING_URI="databricks://DEFAULT"
+MLFLOW_REGISTRY_URI="databricks-uc"
+
+# Frontend proxy settings
+CHAT_APP_PORT=3000
+CHAT_PROXY_TIMEOUT_SECONDS=300
+```
+
+## Next Steps
+
+After quickstart completes:
+1. Run `uv run discover-tools` to find available workspace resources (see **discover-tools** skill)
+2. Run `uv run start-app` to test locally (see **run-locally** skill)
diff --git a/skills/quickstart/agents/openai.yaml b/skills/quickstart/agents/openai.yaml
new file mode 100644
index 0000000..c794ea4
--- /dev/null
+++ b/skills/quickstart/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Agent Quickstart"
+  short_description: "Set up Databricks agent dev environment"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $quickstart for setting up a Databricks agent development environment (authentication, .env, MLflow)."
diff --git a/skills/quickstart/assets/databricks.png b/skills/quickstart/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/quickstart/assets/databricks.png differ
diff --git a/skills/quickstart/assets/databricks.svg b/skills/quickstart/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/quickstart/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/run-locally/SKILL.md b/skills/run-locally/SKILL.md
new file mode 100644
index 0000000..d4d89a0
--- /dev/null
+++ b/skills/run-locally/SKILL.md
@@ -0,0 +1,92 @@
+---
+name: run-locally
+description: "Run and test the agent locally. Use when: (1) User says 'run locally', 'start server', 'test agent', or 'localhost', (2) Need curl commands to test API, (3) Troubleshooting local development issues, (4) Configuring server options like port or hot-reload."
+metadata:
+  version: "0.0.1"
+---
+
+# Run Agent Locally
+
+## Start the Server
+
+```bash
+uv run start-app
+```
+
+This starts the agent at http://localhost:8000
+
+## Server Options
+
+```bash
+# Hot-reload on code changes (development)
+uv run start-server --reload
+
+# Custom port
+uv run start-server --port 8001
+
+# Multiple workers (production-like)
+uv run start-server --workers 4
+
+# Combine options
+uv run start-server --reload --port 8001
+```
+
+## Test the API
+
+**Streaming request:**
+```bash
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }'
+```
+
+**Non-streaming request:**
+```bash
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -d '{ "input": [{ "role": "user", "content": "hi" }] }'
+```
+
+## Run Evaluation
+
+```bash
+uv run agent-evaluate
+```
+
+Uses MLflow scorers (RelevanceToQuery, Safety).
+
+## Run Unit Tests
+
+```bash
+pytest [path]
+```
+
+## Troubleshooting
+
+| Issue | Solution |
+|-------|----------|
+| **Port already in use** | Use `--port 8001` or kill existing process |
+| **Authentication errors** | Verify `.env` is correct; run **quickstart** skill |
+| **Module not found** | Run `uv sync` to install dependencies |
+| **MLflow experiment not found** | Ensure `MLFLOW_TRACKING_URI` in `.env` is `databricks://<profile-name>` |
+
+### MLflow Experiment Not Found
+
+If you see: "The provided MLFLOW_EXPERIMENT_ID environment variable value does not exist"
+
+**Verify the experiment exists:**
+```bash
+databricks -p <profile> experiments get-experiment <experiment_id>
+```
+
+**Fix:** Ensure `.env` has the correct tracking URI format:
+```bash
+MLFLOW_TRACKING_URI="databricks://DEFAULT"  # Include profile name
+```
+
+The quickstart script configures this automatically. If you manually edited `.env`, ensure the profile name is included.
+
+## Next Steps
+
+- Modify your agent: see **modify-agent** skill
+- Deploy to Databricks: see **deploy** skill
diff --git a/skills/run-locally/agents/openai.yaml b/skills/run-locally/agents/openai.yaml
new file mode 100644
index 0000000..bf31aa0
--- /dev/null
+++ b/skills/run-locally/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Run Locally"
+  short_description: "Run and test the agent locally"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $run-locally for running and testing an agent locally with curl examples."
diff --git a/skills/run-locally/assets/databricks.png b/skills/run-locally/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/run-locally/assets/databricks.png differ
diff --git a/skills/run-locally/assets/databricks.svg b/skills/run-locally/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/run-locally/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/supervisor-api-background-mode/SKILL.md b/skills/supervisor-api-background-mode/SKILL.md
new file mode 100644
index 0000000..6352837
--- /dev/null
+++ b/skills/supervisor-api-background-mode/SKILL.md
@@ -0,0 +1,506 @@
+---
+name: supervisor-api-background-mode
+description: "Enable Supervisor API background mode for long-running agent tasks. Use when: (1) Agent needs to run tasks longer than HTTP timeout limits, (2) User says 'background mode', 'long-running', 'supervisor api', (3) Converting from streaming to background polling pattern, (4) Agent needs resilience to connection drops during execution."
+metadata:
+  version: "0.0.1"
+---
+
+# Supervisor API Background Mode
+
+**Prerequisites:**
+1. Run **quickstart** first (`uv run quickstart`) — it creates the MLflow experiment and `.env` file needed by the server.
+2. Follow the **supervisor-api** skill to set up the Supervisor API with hosted tools and permissions. This skill extends that setup with background mode support.
+
+Background mode submits the request asynchronously (`background=True`), polls for completion, and streams the result back to the frontend. Use this when agent tasks may exceed HTTP timeout limits (complex multi-tool workflows, large data analysis, etc.).
+
+## Before Starting
+
+Use the `AskUserQuestion` tool to ask: "How often should the agent poll for background task completion?" with options:
+- **Every 2 seconds** — Fast response times, good for interactive use
+- **Every 10 seconds** — Balanced between responsiveness and API load
+- **Every 30 seconds** — Lower API load, suitable for very long-running tasks
+
+Use their answer to set `POLL_INTERVAL` in `agent_server/utils.py`.
+
+## Architecture
+
+```
+Chat UI ──POST /api/chat──> Express ──streamText()──> Python @stream()
+                                                        |
+                                                        +-- responses.create(background=True, stream=False)
+                                                        |   (returns response_id immediately)
+                                                        |
+                                                        +-- poll every 2s: responses.retrieve(id)
+                                                        |   skip items with status queued/incomplete/in_progress
+                                                        |   yield completed items
+                                                        |
+                                                        +-- convert items to stream events
+                                                        |   (chunk text into word-based deltas)
+                                                        |
+Chat UI <──SSE stream──── Express <──stream──------+
+```
+
+## What Changes from the Base Supervisor API
+
+| Aspect | Base Supervisor API | Background Mode |
+|--------|-------------------|-----------------|
+| `responses.create()` | `stream=True` or `stream=False` | `background=True, stream=False` |
+| Response | Immediate result or SSE stream | Returns `response_id` immediately |
+| Result retrieval | Direct from response | Poll `responses.retrieve(id)` every 2s |
+| Streaming to frontend | Native SSE from API | Simulated streaming (chunked text deltas) |
+| Client pattern | Single `DatabricksOpenAI` | Single `AsyncDatabricksOpenAI` with `use_ai_gateway=True` |
+| MCP tools | Executed in single request | Multi-turn approval flow (see gotcha #3) |
+| Timeout | HTTP request timeout | No timeout — polls until terminal status |
+
+## Step 1: Add `agent_server/utils.py`
+
+This replaces the base Supervisor API's simple `responses.create()` call with a polling loop and streaming conversion.
+
+Uses a single `AsyncDatabricksOpenAI` client with `use_ai_gateway=True` for both `responses.create()` and `responses.retrieve()`.
+
+```python
+import asyncio
+import logging
+from typing import AsyncGenerator
+from uuid import uuid4
+
+from databricks.sdk import WorkspaceClient
+from databricks_openai import AsyncDatabricksOpenAI
+from mlflow.types.responses import ResponsesAgentRequest, ResponsesAgentStreamEvent
+
+POLL_INTERVAL = 2.0  # seconds between polls
+INITIAL_POLL_DELAY = 1.0  # delay before first poll
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+def get_session_id(request: ResponsesAgentRequest) -> str | None:
+    if request.context and request.context.conversation_id:
+        return request.context.conversation_id
+    if request.custom_inputs and isinstance(request.custom_inputs, dict):
+        return request.custom_inputs.get("session_id")
+    return None
+
+
+def create_supervisor_client(
+    workspace_client: WorkspaceClient | None = None,
+) -> AsyncDatabricksOpenAI:
+    """
+    Create an AsyncDatabricksOpenAI client routed through AI Gateway.
+    use_ai_gateway=True automatically resolves the correct AI Gateway endpoint.
+    """
+    workspace_client = workspace_client or WorkspaceClient()
+    client = AsyncDatabricksOpenAI(
+        workspace_client=workspace_client,
+        use_ai_gateway=True,
+    )
+    return client
+
+
+def _count_history_items(request: ResponsesAgentRequest) -> int:
+    """Count input items that will be echoed in the response output.
+
+    Background mode returns ALL output items for the full conversation — including
+    items echoed from the input history. We skip these so we only yield new items.
+    """
+    ECHOED_TYPES = {"function_call", "function_call_output"}
+    count = 0
+    for item in request.input:
+        item_dict = item.model_dump() if hasattr(item, "model_dump") else item
+        role = item_dict.get("role")
+        item_type = item_dict.get("type")
+        if role == "assistant" or item_type in ECHOED_TYPES:
+            count += 1
+    return count
+
+
+async def poll_background_response(
+    client: AsyncDatabricksOpenAI,
+    response,
+    request: ResponsesAgentRequest | None = None,
+) -> AsyncGenerator[dict, None]:
+    """
+    Poll a background response until terminal status (completed/failed/cancelled).
+    Yields new output items as they appear, including mcp_approval_request items
+    so the frontend can show them to the user for approval.
+
+    When MCP tools are involved, the response will complete with mcp_approval_request
+    items. These are yielded to the frontend. The user approves in the UI, and the
+    frontend sends a new request with the approval in the input — that new request
+    goes through background mode again naturally.
+
+    Args:
+        request: The original agent request. Used to calculate how many echoed
+            history items to skip at the start of the output (background mode
+            echoes back all previous assistant messages and tool calls).
+    """
+    skip_items = _count_history_items(request) if request else 0
+    seen_item_count = skip_items
+    if skip_items > 0:
+        logger.info(f"[poll] Skipping first {skip_items} echoed history items")
+    poll_count = 0
+    response_id = response.id
+
+    logger.info(
+        f"[poll] Starting polling for response_id={response_id}, "
+        f"interval={POLL_INTERVAL}s"
+    )
+
+    # Check if the initial response is already terminal (skip polling entirely)
+    if response.status not in ("queued", "in_progress"):
+        logger.info(
+            f"[poll] Initial response already terminal: "
+            f"status={response.status}"
+        )
+        for item in (response.output or [])[skip_items:]:
+            item_dict = (
+                item.model_dump() if hasattr(item, "model_dump") else item
+            )
+            item_status = item_dict.get("status", "")
+            if item_status in ("queued", "incomplete", "in_progress"):
+                continue
+            yield item_dict
+        return
+
+    await asyncio.sleep(INITIAL_POLL_DELAY)
+
+    while True:
+        poll_count += 1
+        logger.info(f"[poll] Poll #{poll_count} for response_id={response_id}")
+
+        try:
+            response = await client.responses.retrieve(response_id)
+        except Exception as e:
+            logger.warning(f"[poll] Retrieve failed (will retry): {e}")
+            await asyncio.sleep(POLL_INTERVAL)
+            continue
+
+        status = response.status
+
+        current_items = response.output or []
+        new_items = len(current_items) - seen_item_count
+        logger.info(
+            f"[poll] status={status}, total_items={len(current_items)}, "
+            f"new_items={new_items}"
+        )
+
+        if new_items > 0:
+            for idx, item in enumerate(current_items[seen_item_count:]):
+                item_dict = (
+                    item.model_dump() if hasattr(item, "model_dump") else item
+                )
+                item_status = item_dict.get("status", "")
+                item_id = item_dict.get("id")
+                item_type = item_dict.get("type")
+                # Stop at incomplete items to preserve ordering
+                if item_status in ("queued", "incomplete", "in_progress"):
+                    logger.info(
+                        f"[poll] Stopping at incomplete item: "
+                        f"type={item_type}, status={item_status}"
+                    )
+                    break
+                if not item_id and item_type != "function_call_output":
+                    logger.info(
+                        f"[poll] Stopping at item with no id: "
+                        f"type={item_type}, status={item_status}"
+                    )
+                    break
+                logger.info(
+                    f"[poll] Yielding item: type={item_type}, "
+                    f"id={item_id}"
+                )
+                yield item_dict
+                seen_item_count += 1
+            else:
+                # Loop completed without break — all items processed
+                seen_item_count = len(current_items)
+
+        if status == "completed":
+            logger.info(
+                f"[poll] Response completed after {poll_count} polls, "
+                f"{seen_item_count} total items"
+            )
+            return
+        elif status in ("failed", "cancelled"):
+            error_msg = (
+                getattr(response, "error", None)
+                or f"Background response {status}"
+            )
+            logger.error(f"[poll] Response {status}: {error_msg}")
+            raise RuntimeError(f"Background response {status}: {error_msg}")
+
+        logger.info(f"[poll] Waiting {POLL_INTERVAL}s before next poll...")
+        await asyncio.sleep(POLL_INTERVAL)
+
+
+def _chunk_text(text: str, chunk_size: int = 1) -> list[str]:
+    """Split text into word-based chunks for streaming."""
+    words = text.split(" ")
+    chunks = []
+    for i in range(0, len(words), chunk_size):
+        chunk = " ".join(words[i : i + chunk_size])
+        if i + chunk_size < len(words):
+            chunk += " "
+        chunks.append(chunk)
+    return chunks
+
+
+def output_item_to_stream_events(
+    item: dict,
+) -> list[ResponsesAgentStreamEvent]:
+    """Convert a Responses API output item to stream events.
+
+    For message items, text is chunked into small word-based deltas to simulate
+    streaming on the frontend, since background mode returns the full text at once.
+    """
+    events = []
+    item_type = item.get("type")
+    item_id = item.get("id", str(uuid4()))
+
+    if item_type == "message":
+        seq = 0
+        for content_part in item.get("content", []):
+            if content_part.get("type") == "output_text":
+                text = content_part.get("text", "")
+                for chunk in _chunk_text(text):
+                    events.append(
+                        ResponsesAgentStreamEvent(
+                            type="response.output_text.delta",
+                            item_id=item_id,
+                            content_index=seq,
+                            delta=chunk,
+                        )
+                    )
+                    seq += 1
+        events.append(
+            ResponsesAgentStreamEvent(
+                type="response.output_item.done",
+                item=item,
+            )
+        )
+    else:
+        # function_call, function_call_output, mcp_approval_request, or other types
+        events.append(
+            ResponsesAgentStreamEvent(
+                type="response.output_item.done",
+                item=item,
+            )
+        )
+
+    return events
+```
+
+## Step 2: Update `agent_server/agent.py`
+
+Replace the base Supervisor API handlers with async background mode handlers. The key differences from the base skill:
+- Use `async` handlers (required for polling)
+- Pass `background=True, stream=False` to `responses.create()`
+- Poll with `poll_background_response()` instead of reading the response directly
+- Convert output items to stream events with `output_item_to_stream_events()`
+- Pass `request` to `poll_background_response()` so it can skip echoed history items in multi-turn conversations
+
+Include your `TOOLS` list from the **supervisor-api** skill's Step 2 if you have hosted tools.
+
+```python
+import asyncio
+import logging
+from typing import AsyncGenerator
+
+import mlflow
+from databricks.sdk import WorkspaceClient
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+    ResponsesAgentStreamEvent,
+)
+
+from agent_server.utils import (
+    create_supervisor_client,
+    get_session_id,
+    output_item_to_stream_events,
+    poll_background_response,
+)
+
+mlflow.openai.autolog()
+logging.getLogger("mlflow.utils.autologging_utils").setLevel(logging.ERROR)
+logging.basicConfig(level=logging.INFO)
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+MODEL = "databricks-claude-sonnet-4"
+SYSTEM_INSTRUCTIONS = "You are a helpful assistant."
+TOOLS = [...]  # Your hosted tools from supervisor-api Step 2
+
+
+def build_input(request: ResponsesAgentRequest) -> list[dict]:
+    return [i.model_dump() for i in request.input]
+
+
+@invoke()
+async def invoke_handler(
+    request: ResponsesAgentRequest,
+) -> ResponsesAgentResponse:
+    if session_id := get_session_id(request):
+        mlflow.update_current_trace(
+            metadata={"mlflow.trace.session": session_id}
+        )
+
+    workspace_client = WorkspaceClient()
+    client = create_supervisor_client(workspace_client)
+
+    logger.info(f"[invoke] Submitting background request with model={MODEL}")
+    response = await client.responses.create(
+        model=MODEL,
+        instructions=SYSTEM_INSTRUCTIONS,
+        input=build_input(request),
+        tools=TOOLS,
+        background=True,
+        stream=False,
+    )
+    logger.info(
+        f"[invoke] Background request submitted: "
+        f"id={response.id}, status={response.status}"
+    )
+
+    # Poll until complete. If an MCP approval request is needed, the next
+    # user message should contain the approval to continue the tool call.
+    output_items = []
+    async for item in poll_background_response(client, response, request):
+        logger.info(
+            f"[invoke] Received output item: "
+            f"type={item.get('type')}, id={item.get('id')}"
+        )
+        output_items.append(item)
+
+    logger.info(f"[invoke] Complete: {len(output_items)} output items")
+    return ResponsesAgentResponse(output=output_items)
+
+
+@stream()
+async def stream_handler(
+    request: ResponsesAgentRequest,
+) -> AsyncGenerator[ResponsesAgentStreamEvent, None]:
+    if session_id := get_session_id(request):
+        mlflow.update_current_trace(
+            metadata={"mlflow.trace.session": session_id}
+        )
+
+    workspace_client = WorkspaceClient()
+    client = create_supervisor_client(workspace_client)
+
+    logger.info(f"[stream] Submitting background request with model={MODEL}")
+    response = await client.responses.create(
+        model=MODEL,
+        instructions=SYSTEM_INSTRUCTIONS,
+        input=build_input(request),
+        tools=TOOLS,
+        background=True,
+        stream=False,
+    )
+    logger.info(
+        f"[stream] Background request submitted: "
+        f"id={response.id}, status={response.status}"
+    )
+
+    # Poll and yield stream events as new output items appear.
+    # If an MCP approval request is needed, the next user message should
+    # contain the approval to continue the tool call.
+    async for item in poll_background_response(client, response, request):
+        events = output_item_to_stream_events(item)
+        logger.info(
+            f"[stream] Received item type={item.get('type')}, "
+            f"emitting {len(events)} stream events"
+        )
+        for event in events:
+            yield event
+            await asyncio.sleep(0.01)  # Small delay for visible streaming effect
+    logger.info("[stream] Complete")
+```
+
+## Key Gotchas
+
+### 1. Incomplete items during `in_progress`
+
+While the response status is `in_progress`, the Supervisor API may return output items that are not yet complete (their `status` field will be `queued`, `incomplete`, or `in_progress`). These partial items may have `id: None` and will cause Pydantic validation errors in `ResponsesAgentStreamEvent` and `ResponsesAgentResponse`. **Always `break` at the first incomplete item** to preserve ordering — items after an incomplete one may also be incomplete or out of order. They'll appear as completed on a later poll.
+
+### 2. Simulated streaming for the frontend
+
+The chat frontend expects SSE streaming events. Since background mode returns the full text at once, `output_item_to_stream_events()` chunks text into 1-word deltas and the stream handler adds a 10ms delay between yields to simulate a realistic streaming experience.
+
+### 3. MCP server tools require a multi-turn approval flow
+
+MCP server tools (`uc_connection` or `app`) require a multi-turn approval flow — see the **supervisor-api** skill for the full explanation and example input.
+
+In background mode, when an MCP tool call requires approval, the response reaches `completed` status (not `in_progress`) with `mcp_approval_request` items in the output. This naturally ends the polling loop. The `mcp_approval_request` items are returned to the frontend for the user to approve.
+
+The approval follow-up is itself a full background mode cycle: the frontend sends a new request (with the original input + `mcp_approval_request` + `mcp_approval_response` appended) using `background=True`, receives a new response ID, and polls again until the final `completed` response with the tool result and assistant message.
+
+### 4. No timeout on polling
+
+The polling loop runs indefinitely until a terminal status (`completed`, `failed`, `cancelled`). There is no max poll time — this is intentional for long-running background tasks. The frontend chat proxy also has no explicit timeout enforced in code.
+
+## Testing
+
+### Test background mode directly against the Supervisor API
+
+```bash
+# Get auth
+export DATABRICKS_HOST=$(databricks auth env --profile <PROFILE> | grep DATABRICKS_HOST | cut -d= -f2)
+export DATABRICKS_TOKEN=$(databricks auth env --profile <PROFILE> | grep DATABRICKS_TOKEN | cut -d= -f2)
+
+# Submit background request with a Genie space tool
+curl -s "${DATABRICKS_HOST}/ai-gateway/mlflow/v1/responses" \
+  -H "Authorization: Bearer ${DATABRICKS_TOKEN}" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "<MODEL>",
+    "input": [{"role": "user", "content": "What were the top 5 products by revenue last quarter?"}],
+    "tools": [
+      {
+        "type": "genie_space",
+        "genie_space": {
+          "description": "Query sales and revenue data",
+          "id": "<genie-space-id>"
+        }
+      }
+    ],
+    "background": true,
+    "stream": false
+  }'
+
+# Poll (use the id from above)
+curl -s "${DATABRICKS_HOST}/ai-gateway/mlflow/v1/responses/<RESPONSE_ID>" \
+  -H "Authorization: Bearer ${DATABRICKS_TOKEN}"
+```
+
+### Test locally via the agent server
+
+```bash
+uv run start-app --no-ui
+
+# In another terminal:
+curl -X POST http://localhost:8000/invocations \
+  -H "Content-Type: application/json" \
+  -H "x-forwarded-access-token: <YOUR_TOKEN>" \
+  -d '{
+    "input": [{"role": "user", "content": [{"type": "input_text", "text": "Hello"}]}]
+  }'
+```
+
+### Example Expected log output
+
+```
+INFO:agent_server.agent:[stream] Submitting background request with model=databricks-claude-sonnet-4
+INFO:agent_server.agent:[stream] Background request submitted: id=resp_xxx, status=queued
+INFO:agent_server.utils:[poll] Starting polling for response_id=resp_xxx, interval=2.0s
+INFO:agent_server.utils:[poll] Poll #1: status=in_progress, total_items=0, new_items=0
+INFO:agent_server.utils:[poll] Waiting 2.0s before next poll...
+INFO:agent_server.utils:[poll] Poll #2: status=completed, total_items=1, new_items=1
+INFO:agent_server.utils:[poll] Yielding item: type=message, id=msg_xxx
+INFO:agent_server.utils:[poll] Response completed after 2 polls, 1 total items
+INFO:agent_server.agent:[stream] Received item type=message, emitting N stream events
+INFO:agent_server.agent:[stream] Complete
+```
diff --git a/skills/supervisor-api-background-mode/agents/openai.yaml b/skills/supervisor-api-background-mode/agents/openai.yaml
new file mode 100644
index 0000000..0d8525a
--- /dev/null
+++ b/skills/supervisor-api-background-mode/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Supervisor API (Background)"
+  short_description: "Long-running tasks via Supervisor API"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $supervisor-api-background-mode for running long-lived agent tasks via Supervisor API background mode."
diff --git a/skills/supervisor-api-background-mode/assets/databricks.png b/skills/supervisor-api-background-mode/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/supervisor-api-background-mode/assets/databricks.png differ
diff --git a/skills/supervisor-api-background-mode/assets/databricks.svg b/skills/supervisor-api-background-mode/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/supervisor-api-background-mode/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file
diff --git a/skills/supervisor-api/SKILL.md b/skills/supervisor-api/SKILL.md
new file mode 100644
index 0000000..11533a8
--- /dev/null
+++ b/skills/supervisor-api/SKILL.md
@@ -0,0 +1,353 @@
+---
+name: supervisor-api
+description: "Replace the client-side agent loop with Databricks Supervisor API (hosted tools). Use when: (1) User asks about Supervisor API, (2) User wants Databricks to run the agent loop server-side, (3) Connecting Genie spaces, UC functions, agent endpoints, or MCP servers as hosted tools."
+metadata:
+  version: "0.0.1"
+---
+
+# Use the Databricks Supervisor API
+
+The Supervisor API lets Databricks run the tool-selection and synthesis loop server-side. Instead of your agent managing tool calls and looping, you declare hosted tools and call `responses.create()` — Databricks handles the rest.
+
+## When to Use
+
+Use the Supervisor API when you want to:
+- Connect Genie spaces, UC functions, Knowledge Assistants, or MCP servers without managing the agent loop yourself
+- Choose models at runtime and control which tools are used per request
+- Offload tool orchestration to Databricks while iterating on your agent
+
+**Limitations:**
+- Cannot mix hosted tools with client-side function tools in the same request
+- Inference parameters (e.g., `temperature`, `top_p`) are not supported when tools are passed
+- Scoped token access (OBO) is not supported — tools run as the app's service principal; grant tool permissions in `databricks.yml`
+- `stream` and `background` cannot both be `true` in the same request
+- Background mode requests have a maximum execution time of 30 minutes
+
+## Step 1: Install `databricks-openai`
+
+Add to `pyproject.toml` if not already present:
+
+```toml
+[project]
+dependencies = [
+    ...
+    "databricks-openai>=0.14.0",
+    "databricks-sdk>=0.55.0",
+]
+```
+
+Then run `uv sync`.
+
+## Step 2: Declare Hosted Tools
+
+Define your tools as a list of dicts. Run `uv run discover-tools` to find available resources in your workspace.
+
+```python
+TOOLS = [
+    # Genie space — natural language queries over structured data
+    {
+        "type": "genie_space",
+        "genie_space": {
+            "id": "<genie-space-id>",
+            "description": "Query sales data using natural language",
+        },
+    },
+    # UC function — SQL or Python UDF
+    {
+        "type": "uc_function",
+        "uc_function": {
+            "name": "<catalog>.<schema>.<function_name>",
+            "description": "Executes a custom UC function",
+        },
+    },
+    # Knowledge Assistant agent endpoint
+    {
+        "type": "knowledge_assistant",
+        "knowledge_assistant": {
+            "knowledge_assistant_id": "<ka-id>",
+            "description": "Answers questions from internal documentation",
+        },
+    },
+    # External MCP server via Unity Catalog connection
+    {
+        "type": "uc_connection",
+        "uc_connection": {
+            "name": "<uc-connection-name>",
+            "description": "Searches the web for current information",
+        },
+    },
+    # Databricks App endpoint or custom MCP server running as a Databricks App
+    {
+        "type": "app",
+        "app": {
+            "name": "<databricks-app-name>",
+            "description": "Custom application or MCP server endpoint",
+        },
+    },
+]
+```
+
+## Step 3: Update `agent_server/agent.py`
+
+Replace your existing invoke/stream handlers with the Supervisor API pattern. Remove any MCP client setup, LangGraph agents, or OpenAI Agents SDK runner code — the Supervisor API replaces the client-side loop entirely.
+
+`use_ai_gateway=True` automatically resolves the correct AI Gateway endpoint for the workspace.
+
+Tools run as the app's service principal — grant each tool's resource permissions in `databricks.yml` (Step 4).
+
+```python
+import os
+import logging
+import mlflow
+from databricks.sdk import WorkspaceClient
+from databricks_openai import DatabricksOpenAI
+from mlflow import MlflowClient
+from mlflow.genai.agent_server import invoke, stream
+from mlflow.tracing import get_tracing_context_headers_for_http_request
+from mlflow.types.responses import (
+    ResponsesAgentRequest,
+    ResponsesAgentResponse,
+)
+
+mlflow.openai.autolog()
+
+logger = logging.getLogger(__name__)
+
+MODEL = "databricks-claude-sonnet-4-5"
+TOOLS = [...]  # From Step 2
+
+# Resolve and cache the AI Gateway client once at module load
+_wc = WorkspaceClient()
+_client = DatabricksOpenAI(workspace_client=_wc, use_ai_gateway=True)
+
+
+def _get_trace_destination() -> dict:
+    experiment_id = os.environ.get("MLFLOW_EXPERIMENT_ID")
+    if not experiment_id:
+        raise RuntimeError(
+            "MLFLOW_EXPERIMENT_ID is not set. Cannot configure distributed tracing."
+        )
+    trace_location = MlflowClient().get_experiment(experiment_id).trace_location
+    if trace_location is None or not hasattr(trace_location, "catalog_name"):
+        msg = (
+            f"Experiment {experiment_id} trace_location is not a Unity Catalog location "
+            f"(got: {type(trace_location).__name__ if trace_location else None}). "
+            "Distributed tracing requires UC-backed traces. "
+            "Ensure 'MLflow traces in Unity Catalog' is enabled for your workspace and that "
+            "the target UC tables use customer-managed storage (Arclight default storage is not supported)."
+        )
+        logger.error(msg)
+        raise RuntimeError(msg)
+    dest = {
+        "catalog_name": trace_location.catalog_name,
+        "schema_name": trace_location.schema_name,
+    }
+    if trace_location.table_prefix is not None:
+        dest["table_prefix"] = trace_location.table_prefix
+    return dest
+
+
+_TRACE_DESTINATION = _get_trace_destination()
+
+
+@invoke()
+def invoke_handler(request: ResponsesAgentRequest) -> ResponsesAgentResponse:
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    response = _client.responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=False,
+        extra_headers=get_tracing_context_headers_for_http_request(),
+        extra_body={
+            "trace_destination": _TRACE_DESTINATION,
+        },
+    )
+    return ResponsesAgentResponse(output=[item.model_dump() for item in response.output])
+
+
+@stream()
+def stream_handler(request: ResponsesAgentRequest):
+    mlflow.update_current_trace(
+        metadata={"mlflow.trace.session": request.context.conversation_id}
+    )
+    return _client.responses.create(
+        model=MODEL,
+        input=[i.model_dump() for i in request.input],
+        tools=TOOLS,
+        stream=True,
+        extra_headers=get_tracing_context_headers_for_http_request(),
+        extra_body={
+            "trace_destination": _TRACE_DESTINATION,
+        },
+    )
+```
+
+## Step 4: Grant Permissions in `databricks.yml`
+
+Grant the service principal access to each hosted tool. See the **add-tools** skill for YAML examples — the resource types are the same. The model serving endpoint is always required.
+
+| Tool type | `resources` entry | Permission |
+|-----------|-------------------|------------|
+| *(all)* | `serving_endpoint` (model) | `CAN_QUERY` |
+| `genie_space` | `genie_space` | `CAN_RUN` |
+| `uc_function` | `uc_securable` (`FUNCTION`) | `EXECUTE` |
+| `knowledge_assistant` | `serving_endpoint` | `CAN_QUERY` |
+| `uc_connection` | `uc_securable` (`CONNECTION`) | `USE_CONNECTION` |
+| `app` | `app` *(CLI support coming soon)* | `CAN_USE` |
+
+## Step 5: Test and Deploy
+
+```bash
+uv run start-app       # Test locally
+databricks bundle deploy && databricks bundle run {{BUNDLE_NAME}}  # Deploy
+```
+
+## Supported Models
+
+Pass any of these as the `model` parameter:
+
+| Model | ID |
+|-------|----|
+| Claude Sonnet 4 | `databricks-claude-sonnet-4` |
+| Claude Sonnet 4.5 | `databricks-claude-sonnet-4-5` |
+| Claude Sonnet 4.6 | `databricks-claude-sonnet-4-6` |
+| Claude Haiku 4.5 | `databricks-claude-haiku-4-5` |
+| Claude Opus 4.1 | `databricks-claude-opus-4-1` |
+| Claude Opus 4.5 | `databricks-claude-opus-4-5` |
+| Claude Opus 4.6 | `databricks-claude-opus-4-6` |
+| GPT-5 | `databricks-gpt-5` |
+
+## Enabling Tracing
+
+The Supervisor API supports **distributed tracing** — spans from the server-side agent loop are linked into the same trace as your agent's client-side spans, giving end-to-end visibility in MLflow. See the [MLflow distributed tracing docs](https://mlflow.org/docs/latest/genai/tracing/app-instrumentation/distributed-tracing/) for more details.
+
+### How It Works
+
+1. The trace destination (UC catalog/schema/table) is resolved from your MLflow experiment at startup
+2. `get_tracing_context_headers_for_http_request()` propagates the active span context to the Supervisor API via HTTP headers
+3. `trace_destination` tells the Supervisor API where to write its server-side spans
+4. Both sets of spans appear as a single connected trace in MLflow
+
+### Setup
+
+Add to `agent_server/agent.py`:
+
+```python
+import os
+import logging
+import mlflow
+from mlflow import MlflowClient
+from mlflow.tracing import get_tracing_context_headers_for_http_request
+
+logger = logging.getLogger(__name__)
+
+
+def _get_trace_destination() -> dict:
+    """Resolve trace destination from the experiment's Unity Catalog trace location."""
+    experiment_id = os.environ.get("MLFLOW_EXPERIMENT_ID")
+    if not experiment_id:
+        raise RuntimeError(
+            "MLFLOW_EXPERIMENT_ID is not set. Cannot configure distributed tracing. "
+            "Ensure the app is configured with a valid MLflow experiment."
+        )
+
+    client = MlflowClient()
+    experiment = client.get_experiment(experiment_id)
+    trace_location = experiment.trace_location
+
+    if trace_location is None or not hasattr(trace_location, "catalog_name"):
+        msg = (
+            f"Experiment {experiment_id} trace_location is not a Unity Catalog location "
+            f"(got: {type(trace_location).__name__ if trace_location else None}). "
+            "Distributed tracing requires UC-backed traces. "
+            "Ensure 'MLflow traces in Unity Catalog' is enabled for your workspace and that "
+            "the target UC tables use customer-managed storage (Arclight default storage is not supported)."
+        )
+        logger.error(msg)
+        raise RuntimeError(msg)
+
+    dest = {
+        "catalog_name": trace_location.catalog_name,
+        "schema_name": trace_location.schema_name,
+    }
+    if trace_location.table_prefix is not None:
+        dest["table_prefix"] = trace_location.table_prefix
+    return dest
+
+
+# Resolve once at module load — fail fast if the experiment is misconfigured
+_TRACE_DESTINATION = _get_trace_destination()
+```
+
+Then pass `extra_headers` and `extra_body` in every `responses.create()` call:
+
+```python
+response = _client.responses.create(
+    model=MODEL,
+    input=[i.model_dump() for i in request.input],
+    tools=TOOLS,
+    stream=False,
+    extra_headers=get_tracing_context_headers_for_http_request(),
+    extra_body={
+        "trace_destination": _TRACE_DESTINATION,
+    },
+)
+```
+
+- `extra_headers` — propagates the active MLflow span context so client and server spans are linked into one trace
+- `trace_destination` — tells the Supervisor API where to write server-side spans in Unity Catalog
+
+### Environment Variable
+
+`MLFLOW_EXPERIMENT_ID` must be set in your app environment. The quickstart script sets this automatically. To verify:
+
+```bash
+grep MLFLOW_EXPERIMENT_ID .env
+```
+
+> **Claude:** Before writing any tracing code, check `.env` for `MLFLOW_EXPERIMENT_ID`. If it is missing or empty, ask the user:
+> *"Distributed tracing requires `MLFLOW_EXPERIMENT_ID` to be set in `.env`. Do you have an MLflow experiment ID? If not, run `uv run quickstart` to create one, or provide the experiment ID and I'll add it to `.env`."*
+>
+> If the value is present but looks wrong (e.g. not a numeric string), warn the user before proceeding.
+
+## MCP Server Tools: Multi-Turn Approval Flow
+
+When using MCP server tools (`uc_connection` or `app`), the Supervisor API does **not** execute the MCP tool call in a single request. Instead, it returns a `completed` response containing `mcp_approval_request` output items. To complete the tool call, your agent must handle a multi-turn flow:
+
+1. **First request** — `responses.create()` → response completes with `mcp_approval_request` items in the output
+2. **Return to frontend** — the `mcp_approval_request` item is returned to the chat UI so the user can approve the tool call
+3. **Second request** — user approves → frontend sends a new request with the original input + `mcp_approval_request` + `mcp_approval_response` (with `approve: true`) appended to the input
+4. **Result** — the second response completes with the actual `function_call_output` (tool result) and the final assistant `message`
+
+No special backend handling is needed — the agent server simply returns all output items (including `mcp_approval_request`) to the frontend. The multi-turn flow is handled naturally through the conversation: each request/response is a separate `responses.create()` call.
+
+**Example input for the follow-up request (step 3):**
+```python
+input = [
+    # Original user message
+    {"type": "message", "role": "user", "content": "Search for Databricks"},
+    # The mcp_approval_request from the first response's output
+    {"type": "mcp_approval_request", "id": "call_xxx", "name": "web-search",
+     "server_label": "you_dot_com", "arguments": '{"query": "Databricks"}'},
+    # The approval
+    {"type": "mcp_approval_response", "id": "call_xxx",
+     "approval_request_id": "call_xxx", "approve": True},
+]
+```
+
+## Troubleshooting
+
+**"Please ensure AI Gateway V2 is enabled"** — AI Gateway must be enabled for the workspace. Contact your Databricks account team.
+
+**"Cannot mix hosted and client-side tools"** — Remove any `function`-type tools (Python callables) from `TOOLS`. All tools must be hosted types (`genie_space`, `uc_function`, `knowledge_assistant`, `uc_connection`, `app`).
+
+**"Parameter not supported when tools are provided"** — Remove `temperature`, `top_p`, or other inference parameters from the `responses.create()` call.
+
+## Background Mode (Long-Running Tasks)
+
+If your agent needs to run long-running tasks that may exceed HTTP timeout limits (e.g., complex multi-tool workflows, large data analysis), you can enable **background mode**. This submits the request asynchronously, polls for completion, and streams the result back to the frontend.
+
+See the **supervisor-api-background-mode** skill for full implementation details.
diff --git a/skills/supervisor-api/agents/openai.yaml b/skills/supervisor-api/agents/openai.yaml
new file mode 100644
index 0000000..042edbc
--- /dev/null
+++ b/skills/supervisor-api/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+  display_name: "Supervisor API"
+  short_description: "Use Databricks Supervisor API for hosted tools"
+  icon_small: "./assets/databricks.svg"
+  icon_large: "./assets/databricks.png"
+  brand_color: "#FF3621"
+  default_prompt: "Use $supervisor-api for using the Databricks Supervisor API to run the agent loop server-side."
diff --git a/skills/supervisor-api/assets/databricks.png b/skills/supervisor-api/assets/databricks.png
new file mode 100644
index 0000000..263fe98
Binary files /dev/null and b/skills/supervisor-api/assets/databricks.png differ
diff --git a/skills/supervisor-api/assets/databricks.svg b/skills/supervisor-api/assets/databricks.svg
new file mode 100644
index 0000000..9d19110
--- /dev/null
+++ b/skills/supervisor-api/assets/databricks.svg
@@ -0,0 +1,3 @@
+<svg width="300" height="331" viewBox="0 0 300 331" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M283.923 136.449L150.144 213.624L6.88995 131.168L0 134.982V194.844L150.144 281.115L283.923 204.234V235.926L150.144 313.1L6.88995 230.644L0 234.458V244.729L150.144 331L300 244.729V184.867L293.11 181.052L150.144 263.215L16.0766 186.334V154.643L150.144 231.524L300 145.253V86.2713L292.536 81.8697L150.144 163.739L22.9665 90.9663L150.144 17.8998L254.641 78.055L263.828 72.773V65.4371L150.144 0L0 86.2713V95.6613L150.144 181.933L283.923 104.758V136.449Z" fill="#FF3621"/>
+</svg>
\ No newline at end of file