diff --git a/manifest.json b/manifest.json index 54ec72f..1d6bebb 100644 --- a/manifest.json +++ b/manifest.json @@ -1,12 +1,72 @@ { "version": "2", - "updated_at": "2026-04-30T11:02:41Z", + "updated_at": "2026-05-12T20:26:42Z", "skills": { + "add-tools-langgraph": { + "version": "0.0.1", + "description": "Add tools and permissions to a LangGraph agent (MCP, Genie, vector search, UC functions)", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "add-tools-openai": { + "version": "0.0.1", + "description": "Add tools and permissions to an OpenAI Agents SDK agent (MCP, Genie, vector search, UC functions)", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "agent-langgraph-memory": { + "version": "0.0.1", + "description": "Add memory capabilities (checkpointing, long-term store) to a LangGraph agent", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "agent-openai-memory": { + "version": "0.0.1", + "description": "Add memory capabilities (sessions) to an OpenAI Agents SDK agent", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "create-tools": { + "version": "0.0.1", + "description": "Create Databricks resources (Genie spaces, vector search indexes, UC functions, MCP servers) for use as agent tools", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, "databricks-apps": { "version": "0.1.1", "description": "Databricks Apps development and deployment (evaluates analytics vs synced tables data access)", "experimental": false, - "updated_at": "2026-04-30T11:00:26Z", + "updated_at": "2026-05-12T20:25:04Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -33,7 +93,7 @@ "version": "0.1.0", "description": "Core Databricks skill for CLI, auth, and data exploration", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-12T20:25:04Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -48,7 +108,7 @@ "version": "0.0.0", "description": "Declarative Automation Bundles (DABs) for deploying and managing Databricks resources", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-12T20:25:04Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -66,7 +126,7 @@ "version": "0.1.0", "description": "Databricks Jobs orchestration and scheduling", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-12T20:25:04Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -78,7 +138,7 @@ "version": "0.1.0", "description": "Databricks Lakebase Postgres: projects, scaling, connectivity, synced tables, and Data API", "experimental": false, - "updated_at": "2026-04-30T11:02:37Z", + "updated_at": "2026-05-12T20:25:04Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -93,7 +153,7 @@ "version": "0.1.0", "description": "Databricks Model Serving endpoint management", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-12T20:25:04Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -105,7 +165,7 @@ "version": "0.1.0", "description": "Databricks Pipelines (DLT) for ETL and streaming", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-12T20:25:04Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -152,7 +212,7 @@ "version": "0.1.0", "description": "Migrate Databricks workloads from classic compute to serverless compute, including compatibility checks and concrete fixes", "experimental": false, - "updated_at": "2026-04-24T15:10:23Z", + "updated_at": "2026-05-12T20:25:04Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -164,6 +224,150 @@ "references/networking-and-security.md", "references/streaming-migration.md" ] + }, + "deploy": { + "version": "0.0.1", + "description": "Deploy an agent to Databricks Apps via Databricks Asset Bundles (DAB)", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "discover-tools": { + "version": "0.0.1", + "description": "Discover available tools and resources (MCP servers, Genie spaces, UC functions, vector search) in a Databricks workspace", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "lakebase-setup": { + "version": "0.0.1", + "description": "Configure Lakebase as storage for agent memory (checkpoints, sessions, long-term store)", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "load-testing": { + "version": "0.0.1", + "description": "Load test a Databricks App to find its maximum QPS", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "long-running-server": { + "version": "0.0.1", + "description": "Enable long-running background task support in an agent server (LongRunningAgentServer)", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "migrate-from-model-serving": { + "version": "0.0.1", + "description": "Migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "modify-langgraph-agent": { + "version": "0.0.1", + "description": "Modify agent code, add tools, or change configuration in a LangGraph template", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "modify-openai-agent": { + "version": "0.0.1", + "description": "Modify agent code, add tools, or change configuration in an OpenAI Agents SDK template", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "quickstart": { + "version": "0.0.1", + "description": "Set up a Databricks agent development environment (authentication, .env, MLflow experiment)", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "run-locally": { + "version": "0.0.1", + "description": "Run and test an agent locally with curl examples and hot-reload", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "supervisor-api": { + "version": "0.0.1", + "description": "Use the Databricks Supervisor API to run the agent loop server-side with hosted tools", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] + }, + "supervisor-api-background-mode": { + "version": "0.0.1", + "description": "Run long-lived agent tasks via Supervisor API background mode (polling pattern)", + "experimental": true, + "updated_at": "2026-05-12T20:26:11Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] } } } diff --git a/scripts/skills.py b/scripts/skills.py index cdfdcf7..5bcd00f 100644 --- a/scripts/skills.py +++ b/scripts/skills.py @@ -48,6 +48,75 @@ "description": "Migrate Databricks workloads from classic compute to serverless compute, including compatibility checks and concrete fixes", "experimental": False, }, + # Skills imported from databricks/app-templates (.claude/skills/). ML-63273. + "add-tools-langgraph": { + "description": "Add tools and permissions to a LangGraph agent (MCP, Genie, vector search, UC functions)", + "experimental": True, + }, + "add-tools-openai": { + "description": "Add tools and permissions to an OpenAI Agents SDK agent (MCP, Genie, vector search, UC functions)", + "experimental": True, + }, + "agent-langgraph-memory": { + "description": "Add memory capabilities (checkpointing, long-term store) to a LangGraph agent", + "experimental": True, + }, + "agent-openai-memory": { + "description": "Add memory capabilities (sessions) to an OpenAI Agents SDK agent", + "experimental": True, + }, + "create-tools": { + "description": "Create Databricks resources (Genie spaces, vector search indexes, UC functions, MCP servers) for use as agent tools", + "experimental": True, + }, + "deploy": { + "description": "Deploy an agent to Databricks Apps via Databricks Asset Bundles (DAB)", + "experimental": True, + }, + "discover-tools": { + "description": "Discover available tools and resources (MCP servers, Genie spaces, UC functions, vector search) in a Databricks workspace", + "experimental": True, + }, + "lakebase-setup": { + "description": "Configure Lakebase as storage for agent memory (checkpoints, sessions, long-term store)", + "experimental": True, + }, + "load-testing": { + "description": "Load test a Databricks App to find its maximum QPS", + "experimental": True, + }, + "long-running-server": { + "description": "Enable long-running background task support in an agent server (LongRunningAgentServer)", + "experimental": True, + }, + "migrate-from-model-serving": { + "description": "Migrate an MLflow ResponsesAgent from Databricks Model Serving to Databricks Apps", + "experimental": True, + }, + "modify-langgraph-agent": { + "description": "Modify agent code, add tools, or change configuration in a LangGraph template", + "experimental": True, + }, + "modify-openai-agent": { + "description": "Modify agent code, add tools, or change configuration in an OpenAI Agents SDK template", + "experimental": True, + }, + "quickstart": { + "description": "Set up a Databricks agent development environment (authentication, .env, MLflow experiment)", + "experimental": True, + }, + "run-locally": { + "description": "Run and test an agent locally with curl examples and hot-reload", + "experimental": True, + }, + "supervisor-api": { + "description": "Use the Databricks Supervisor API to run the agent loop server-side with hosted tools", + "experimental": True, + }, + "supervisor-api-background-mode": { + "description": "Run long-lived agent tasks via Supervisor API background mode (polling pattern)", + "experimental": True, + }, } diff --git a/skills/add-tools-langgraph/SKILL.md b/skills/add-tools-langgraph/SKILL.md new file mode 100644 index 0000000..f4fcd77 --- /dev/null +++ b/skills/add-tools-langgraph/SKILL.md @@ -0,0 +1,126 @@ +--- +name: add-tools-langgraph +description: "Add tools to your agent and grant required permissions in databricks.yml. Use when: (1) Adding MCP servers, Genie spaces, vector search, or UC functions to agent, (2) Permission errors at runtime, (3) User says 'add tool', 'connect to', 'grant permission', (4) Configuring databricks.yml resources." +metadata: + version: "0.0.1" +--- + +# Add Tools & Grant Permissions + +> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks --profile ` + +> Don't have the resource yet? See **create-tools** skill first. + +**After adding any MCP server to your agent, you MUST grant the app access in `databricks.yml`.** + +Without this, you'll get permission errors when the agent tries to use the resource. + +## Workflow + +**Step 1:** Add MCP server in `agent_server/agent.py`: +```python +from databricks_langchain import DatabricksMCPServer, DatabricksMultiServerMCPClient + +genie_server = DatabricksMCPServer( + url=f"{host}/api/2.0/mcp/genie/01234567-89ab-cdef", + name="my genie space", +) + +mcp_client = DatabricksMultiServerMCPClient([genie_server]) +tools = await mcp_client.get_tools() +``` + +**Step 2:** Grant access in `databricks.yml`: +```yaml +resources: + apps: + agent_langgraph: + resources: + - name: 'my_genie_space' + genie_space: + name: 'My Genie Space' + space_id: '01234567-89ab-cdef' + permission: 'CAN_RUN' +``` + +**Step 3:** Deploy and run: +```bash +databricks bundle deploy +databricks bundle run agent_langgraph # Required to start app with new code! +``` + +See **deploy** skill for more details. + +## Resource Type Examples + +See the `examples/` directory for complete YAML snippets: + +| File | Resource Type | When to Use | +|------|--------------|-------------| +| `uc-function.yaml` | Unity Catalog function | UC functions via MCP | +| `uc-connection.yaml` | UC connection | External MCP servers | +| `vector-search.yaml` | Vector search index | RAG applications | +| `sql-warehouse.yaml` | SQL warehouse | SQL execution | +| `serving-endpoint.yaml` | Model serving endpoint | Model inference | +| `genie-space.yaml` | Genie space | Natural language data | +| `lakebase.yaml` | Lakebase database | Agent memory storage (provisioned) | +| `lakebase-autoscaling.yaml` | Lakebase autoscaling postgres | Agent memory storage (autoscaling) | +| `experiment.yaml` | MLflow experiment | Tracing (already configured) | +| `app.yaml` | Databricks App (app-to-app) | Custom MCP servers hosted as Apps | +| `custom-mcp-server.md` | Custom MCP apps | Apps starting with `mcp-*` | + +## Custom MCP Servers (Databricks Apps) + +Declare the target app as an `app` resource in `databricks.yml` — the bundle grants `CAN_USE` on deploy. Requires Databricks CLI **v0.298.0+**. + +```yaml +resources: + apps: + agent_langgraph: + resources: + - name: 'mcp_server' + app: + name: 'mcp-my-server' + permission: CAN_USE +``` + +See `examples/custom-mcp-server.md` for the full flow (agent code + YAML + deploy). + +## value_from Pattern + +**IMPORTANT**: Make sure all `value_from` references in `databricks.yml` `config.env` reference an existing key in the `databricks.yml` `resources` list. +Some resources need environment variables in your app. Use `value_from` in `databricks.yml` `config.env` to reference resources defined in `databricks.yml`: + +```yaml +# In databricks.yml, under apps..config.env: +env: + - name: MLFLOW_EXPERIMENT_ID + value_from: "experiment" # References resources.apps..resources[name='experiment'] + - name: LAKEBASE_INSTANCE_NAME + value_from: "database" # References resources.apps..resources[name='database'] +``` + +**Critical:** Every `value_from` value must match a `name` field in `databricks.yml` resources. + +## MCP Error Handling + +MCP tool calls can fail (network issues, permission errors, timeouts). Use `handle_tool_error` on MCP servers to catch errors and return them to the LLM instead of crashing the agent: + +```python +DatabricksMCPServer( + name="genie", + url=f"{host}/api/2.0/mcp/genie/{space_id}", + handle_tool_error=True, # Return error messages to LLM instead of raising + timeout=60.0, # Increase timeout for slow tools like Genie +) +``` + +For local function tools defined with `@tool`, see `create-tools` skill > `examples/local-python-tools.md` for the `ToolException` + `handle_tool_error` pattern. + +## Important Notes + +- **MLflow experiment**: Already configured in template, no action needed +- **Multiple resources**: Add multiple entries under `resources:` list +- **Permission types vary**: Each resource type has specific permission values +- **Deploy + Run after changes**: Run both `databricks bundle deploy` AND `databricks bundle run {{BUNDLE_NAME}}` +- **value_from matching**: Ensure `config.env` `value_from` values match `databricks.yml` resource `name` values diff --git a/skills/add-tools-langgraph/agents/openai.yaml b/skills/add-tools-langgraph/agents/openai.yaml new file mode 100644 index 0000000..f194c24 --- /dev/null +++ b/skills/add-tools-langgraph/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Add Tools (LangGraph)" + short_description: "Add tools and permissions for LangGraph agents" + icon_small: "./assets/databricks.svg" + icon_large: "./assets/databricks.png" + brand_color: "#FF3621" + default_prompt: "Use $add-tools-langgraph for adding tools and permissions to a LangGraph agent." diff --git a/skills/add-tools-langgraph/assets/databricks.png b/skills/add-tools-langgraph/assets/databricks.png new file mode 100644 index 0000000..263fe98 Binary files /dev/null and b/skills/add-tools-langgraph/assets/databricks.png differ diff --git a/skills/add-tools-langgraph/assets/databricks.svg b/skills/add-tools-langgraph/assets/databricks.svg new file mode 100644 index 0000000..9d19110 --- /dev/null +++ b/skills/add-tools-langgraph/assets/databricks.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/skills/add-tools-openai/SKILL.md b/skills/add-tools-openai/SKILL.md new file mode 100644 index 0000000..c43eeb3 --- /dev/null +++ b/skills/add-tools-openai/SKILL.md @@ -0,0 +1,104 @@ +--- +name: add-tools-openai +description: "Add tools to your agent and grant required permissions in databricks.yml. Use when: (1) Adding MCP servers, Genie spaces, vector search, or UC functions to agent, (2) Permission errors at runtime, (3) User says 'add tool', 'connect to', 'grant permission', (4) Configuring databricks.yml resources." +metadata: + version: "0.0.1" +--- + +# Add Tools & Grant Permissions + +> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks --profile ` + +> Don't have the resource yet? See **create-tools** skill first. + +**After adding any MCP server to your agent, you MUST grant the app access in `databricks.yml`.** + +Without this, you'll get permission errors when the agent tries to use the resource. + +## Workflow + +**Step 1:** Add MCP server in `agent_server/agent.py`: +```python +from databricks_openai.agents import McpServer + +genie_server = McpServer( + url=f"{host}/api/2.0/mcp/genie/01234567-89ab-cdef", + name="my genie space", +) + +agent = Agent( + name="my agent", + model="databricks-claude-3-7-sonnet", + mcp_servers=[genie_server], +) +``` + +**Step 2:** Grant access in `databricks.yml`: +```yaml +resources: + apps: + {{BUNDLE_NAME}}: + resources: + - name: 'my_genie_space' + genie_space: + name: 'My Genie Space' + space_id: '01234567-89ab-cdef' + permission: 'CAN_RUN' +``` + +**Step 3:** Deploy with `databricks bundle deploy` (see **deploy** skill) + +## Resource Type Examples + +See the `examples/` directory for complete YAML snippets: + +| File | Resource Type | When to Use | +|------|--------------|-------------| +| `uc-function.yaml` | Unity Catalog function | UC functions | +| `uc-connection.yaml` | UC connection | External MCP servers | +| `vector-search.yaml` | Vector search index | RAG applications | +| `sql-warehouse.yaml` | SQL warehouse | SQL execution | +| `serving-endpoint.yaml` | Model serving endpoint | Model inference | +| `genie-space.yaml` | Genie space | Natural language data | +| `lakebase-autoscaling.yaml` | Lakebase autoscaling postgres | Agent memory storage (autoscaling) | +| `experiment.yaml` | MLflow experiment | Tracing (already configured) | +| `app.yaml` | Databricks App (app-to-app) | Custom MCP servers hosted as Apps | +| `custom-mcp-server.md` | Custom MCP apps | Apps starting with `mcp-*` | + +## Custom MCP Servers (Databricks Apps) + +Declare the target app as an `app` resource in `databricks.yml` — the bundle grants `CAN_USE` on deploy. Requires Databricks CLI **v0.298.0+**. + +```yaml +resources: + apps: + {{BUNDLE_NAME}}: + resources: + - name: 'mcp_server' + app: + name: 'mcp-my-server' + permission: CAN_USE +``` + +See `examples/custom-mcp-server.md` for the full flow (agent code + YAML + deploy). + +## MCP Error Handling + +MCP tool calls can fail (network issues, permission errors, timeouts). The OpenAI Agents SDK catches tool errors by default and returns the error message to the LLM. To customize timeout behavior for MCP servers: + +```python +mcp_server = McpServer( + url=f"{host}/api/2.0/mcp/genie/{space_id}", + name="genie", + timeout=60.0, # Increase timeout for slow tools like Genie (default: 20s) +) +``` + +For local function tools, see `create-tools` skill > `examples/local-python-tools.md` for `failure_error_function` patterns. + +## Important Notes + +- **MLflow experiment**: Already configured in template, no action needed +- **Multiple resources**: Add multiple entries under `resources:` list +- **Permission types vary**: Each resource type has specific permission values +- **Deploy after changes**: Run `databricks bundle deploy` after modifying `databricks.yml` diff --git a/skills/add-tools-openai/agents/openai.yaml b/skills/add-tools-openai/agents/openai.yaml new file mode 100644 index 0000000..6153080 --- /dev/null +++ b/skills/add-tools-openai/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Add Tools (OpenAI)" + short_description: "Add tools and permissions for OpenAI agents" + icon_small: "./assets/databricks.svg" + icon_large: "./assets/databricks.png" + brand_color: "#FF3621" + default_prompt: "Use $add-tools-openai for adding tools and permissions to an OpenAI Agents SDK agent." diff --git a/skills/add-tools-openai/assets/databricks.png b/skills/add-tools-openai/assets/databricks.png new file mode 100644 index 0000000..263fe98 Binary files /dev/null and b/skills/add-tools-openai/assets/databricks.png differ diff --git a/skills/add-tools-openai/assets/databricks.svg b/skills/add-tools-openai/assets/databricks.svg new file mode 100644 index 0000000..9d19110 --- /dev/null +++ b/skills/add-tools-openai/assets/databricks.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/skills/agent-langgraph-memory/SKILL.md b/skills/agent-langgraph-memory/SKILL.md new file mode 100644 index 0000000..46d2e58 --- /dev/null +++ b/skills/agent-langgraph-memory/SKILL.md @@ -0,0 +1,387 @@ +--- +name: agent-langgraph-memory +description: "Add memory capabilities to your agent. Use when: (1) User asks about 'memory', 'state', 'remember', 'conversation history', (2) Want to persist conversations or user preferences, (3) Adding checkpointing or long-term storage." +metadata: + version: "0.0.1" +--- + +# Adding Memory to Your Agent + +> **Note:** This template does not include memory by default. Use this skill to **add memory capabilities**. For a pre-configured memory template, see: +> - [agent-langgraph-advanced](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced) - Short-term and long-term memory with long-running background tasks + +## Memory Types + +| Type | Use Case | Storage | Identifier | +|------|----------|---------|------------| +| **Short-term** | Conversation history within a session | `AsyncCheckpointSaver` | `thread_id` | +| **Long-term** | User facts that persist across sessions | `AsyncDatabricksStore` | `user_id` | + +## Prerequisites + +1. **Add memory dependency** to `pyproject.toml`: + ```toml + dependencies = [ + "databricks-langchain[memory]", + ] + ``` + + Then run `uv sync` + +2. **Configure Lakebase** - See **lakebase-setup** skill for: + - Creating/configuring Lakebase instance + - Initializing tables (CRITICAL first-time step) + +--- + +## Quick Setup Summary + +Adding memory requires changes to **4 files**: + +| File | What to Add | +|------|-------------| +| `pyproject.toml` | Memory dependency | +| `.env` | Lakebase env vars (for local dev) | +| `databricks.yml` | Lakebase database resource + env vars in config block | +| `agent_server/agent.py` | Memory tools and AsyncDatabricksStore | + +--- + +## Key Principles + +Before implementing memory, understand these patterns from the production implementation. + +### 1. Factory Function Pattern + +Memory tools should be returned from a factory function, not defined as standalone functions: + +```python +def memory_tools(): + @tool + async def get_user_memory(query: str, config: RunnableConfig) -> str: + ... + @tool + async def save_user_memory(memory_key: str, memory_data_json: str, config: RunnableConfig) -> str: + ... + @tool + async def delete_user_memory(memory_key: str, config: RunnableConfig) -> str: + ... + return [get_user_memory, save_user_memory, delete_user_memory] +``` + +### 2. User ID Extraction + +Extract `user_id` from the request, checking `custom_inputs` first. Return `None` (not a default) to let the caller decide: + +```python +def get_user_id(request: ResponsesAgentRequest) -> Optional[str]: + custom_inputs = dict(request.custom_inputs or {}) + if "user_id" in custom_inputs: + return custom_inputs["user_id"] + if request.context and getattr(request.context, "user_id", None): + return request.context.user_id + return None +``` + +### 3. Separate Error Handling + +Check `user_id` and `store` separately with distinct error messages: + +```python +user_id = config.get("configurable", {}).get("user_id") +if not user_id: + return "Memory not available - no user_id provided." + +store: Optional[BaseStore] = config.get("configurable", {}).get("store") +if not store: + return "Memory not available - store not configured." +``` + +### 4. JSON Validation for Save + +Validate JSON input before storing - the LLM may pass invalid JSON: + +```python +try: + memory_data = json.loads(memory_data_json) + if not isinstance(memory_data, dict): + return f"Failed: memory_data must be a JSON object, not {type(memory_data).__name__}" + await store.aput(namespace, memory_key, memory_data) +except json.JSONDecodeError as e: + return f"Failed to save memory: Invalid JSON - {e}" +``` + +### 5. Pass Store via RunnableConfig + +Pass the store through config, not as a function parameter: + +```python +config = {"configurable": {"user_id": user_id, "store": store}} +# Tools access via: config.get("configurable", {}).get("store") +``` + +--- + +## Complete Example + +A full implementation is available in this skill's examples folder: + +```bash +# Copy to your project +cp .claude/skills/agent-memory/examples/memory_tools.py agent_server/ +``` + +See `examples/memory_tools.py` for production-ready code including all helper functions. + +## Production Reference + +For implementations in the pre-built templates: + +| File | Description | +|------|-------------| +| [`agent-langgraph-advanced/agent_server/utils_memory.py`](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced/agent_server/utils_memory.py) | Memory tools factory, helpers, error handling | +| [`agent-langgraph-advanced/agent_server/agent.py`](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced/agent_server/agent.py) | Integration with agent, store initialization | + +Key functions: +- `memory_tools()` - Factory returning get/save/delete tools +- `get_user_id()` - Extract user_id from request +- `resolve_lakebase_instance_name()` - Handle hostname vs instance name +- `get_lakebase_access_error_message()` - Helpful error messages + +--- + +## Configuration Files + +### Step 1: databricks.yml (Lakebase Resource) + +Add the Lakebase database resource to your app: + +```yaml +resources: + apps: + agent_langgraph: + name: "your-app-name" + source_code_path: ./ + + resources: + # ... other resources (experiment, UC functions, etc.) ... + + # Lakebase instance for long-term memory + - name: 'database' + database: + instance_name: '' + database_name: 'databricks_postgres' + permission: 'CAN_CONNECT_AND_CREATE' +``` + +**Important:** The `name: 'database'` must match the `value_from` reference in the `databricks.yml` `config.env` block. + +### Step 2: databricks.yml config block (Environment Variables) + +Add the Lakebase environment variables to your app's `config.env` in `databricks.yml`: + +```yaml + config: + command: ["uv", "run", "start-app"] + env: + # ... other env vars ... + + # Lakebase instance name (resolved from database resource) + - name: LAKEBASE_INSTANCE_NAME + value_from: "database" + + # Embedding configuration + - name: EMBEDDING_ENDPOINT + value: "databricks-gte-large-en" + - name: EMBEDDING_DIMS + value: "1024" +``` + +**Important:** `LAKEBASE_INSTANCE_NAME` uses `value_from: "database"` to resolve from the database resource at deploy time. + +### Step 3: .env (Local Development) + +```bash +# Lakebase configuration for long-term memory +LAKEBASE_INSTANCE_NAME= +EMBEDDING_ENDPOINT=databricks-gte-large-en +EMBEDDING_DIMS=1024 +``` + +--- + +## Integration Example + +Minimal example showing how to integrate memory into your streaming function: + +```python +from agent_server.utils_memory import memory_tools, get_user_id + +@stream() +async def streaming(request: ResponsesAgentRequest): + user_id = get_user_id(request) + + async with AsyncDatabricksStore( + instance_name=LAKEBASE_INSTANCE_NAME, + embedding_endpoint=EMBEDDING_ENDPOINT, + embedding_dims=EMBEDDING_DIMS, + ) as store: + await store.setup() # Creates tables if needed + + tools = await mcp_client.get_tools() + memory_tools() + config = {"configurable": {"user_id": user_id, "store": store}} + + agent = create_react_agent(model=model, tools=tools) + async for event in agent.astream(messages, config): + yield event +``` + +--- + +## Initialize Tables and Deploy + +### Initialize Lakebase Tables (First Time Only) + +Before deploying, initialize the tables locally: + +```bash +uv run python -c "$(cat <<'EOF' +import asyncio +from databricks_langchain import AsyncDatabricksStore + +async def setup(): + async with AsyncDatabricksStore( + instance_name="", + embedding_endpoint="databricks-gte-large-en", + embedding_dims=1024, + ) as store: + await store.setup() + print("Tables created!") + +asyncio.run(setup()) +EOF +)" +``` + +### Deploy + +After initializing tables, deploy your agent. See **deploy** skill for full instructions. + +--- + +## Short-Term Memory + +For conversation history within a session, use `AsyncCheckpointSaver`: + +```python +from databricks_langchain import AsyncCheckpointSaver + +async with AsyncCheckpointSaver(instance_name=LAKEBASE_INSTANCE_NAME) as checkpointer: + agent = create_react_agent( + model=model, + tools=tools, + checkpointer=checkpointer, + ) + + config = {"configurable": {"thread_id": thread_id}} + async for event in agent.astream(messages, config): + yield event +``` + +See the [agent-langgraph-advanced](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced) template for a complete implementation. + +--- + +## Testing Memory + +### Test Locally + +```bash +# Start the server +uv run start-app + +# Save a memory +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ + "input": [{"role": "user", "content": "Remember that I am on the shipping team"}], + "custom_inputs": {"user_id": "alice@example.com"} + }' + +# Recall the memory +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ + "input": [{"role": "user", "content": "What team am I on?"}], + "custom_inputs": {"user_id": "alice@example.com"} + }' + +# Delete a memory +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ + "input": [{"role": "user", "content": "Forget what team I am on"}], + "custom_inputs": {"user_id": "alice@example.com"} + }' +``` + +### Test Deployed App + +```bash +# Get OAuth token (PATs don't work for apps) +TOKEN=$(databricks auth token --host | jq -r '.access_token') + +# Test memory save +curl -X POST https:///invocations \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "input": [{"role": "user", "content": "Remember I prefer detailed explanations"}], + "custom_inputs": {"user_id": "alice@example.com"} + }' +``` + +--- + +## First-Time Setup Checklist + +- [ ] Added `databricks-langchain[memory]` to `pyproject.toml` +- [ ] Run `uv sync` to install dependencies +- [ ] Created or identified Lakebase instance +- [ ] Added Lakebase env vars to `.env` (for local dev) +- [ ] Added `database` resource to `databricks.yml` +- [ ] Added `LAKEBASE_INSTANCE_NAME` to `databricks.yml` `config.env` +- [ ] **Initialized tables locally** by running `await store.setup()` +- [ ] Deployed with `databricks bundle deploy && databricks bundle run` + +--- + +## Troubleshooting + +| Issue | Cause | Solution | +|-------|-------|----------| +| **"embedding_dims is required"** | Missing parameter | Add `embedding_dims=1024` to AsyncDatabricksStore | +| **"relation 'store' does not exist"** | Tables not created | Run `await store.setup()` locally first | +| **"Unable to resolve Lakebase instance 'None'"** | Missing env var | Check `LAKEBASE_INSTANCE_NAME` in databricks.yml `config.env` | +| **"permission denied for table store"** | Missing grants | Add `database` resource to databricks.yml | +| **"Memory not available - no user_id"** | Missing user_id | Pass `custom_inputs.user_id` in request | +| **Memory not persisting** | Different user_ids | Use consistent user_id across requests | +| **App not updated after deploy** | Forgot to run bundle | Run `databricks bundle run agent_langgraph` after deploy | + +--- + +## Pre-Built Memory Templates + +For fully configured implementations without manual setup: + +| Template | Memory Type | Key Features | +|----------|-------------|--------------| +| [agent-langgraph-advanced](https://github.com/databricks/app-templates/tree/main/agent-langgraph-advanced) | Short-term + Long-term | AsyncCheckpointSaver, AsyncDatabricksStore, memory tools | + +--- + +## Next Steps + +- Configure Lakebase: see **lakebase-setup** skill +- Test locally: see **run-locally** skill +- Deploy: see **deploy** skill diff --git a/skills/agent-langgraph-memory/agents/openai.yaml b/skills/agent-langgraph-memory/agents/openai.yaml new file mode 100644 index 0000000..36e08a8 --- /dev/null +++ b/skills/agent-langgraph-memory/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Agent Memory (LangGraph)" + short_description: "Add memory to a LangGraph agent" + icon_small: "./assets/databricks.svg" + icon_large: "./assets/databricks.png" + brand_color: "#FF3621" + default_prompt: "Use $agent-langgraph-memory for adding memory and conversation persistence to a LangGraph agent." diff --git a/skills/agent-langgraph-memory/assets/databricks.png b/skills/agent-langgraph-memory/assets/databricks.png new file mode 100644 index 0000000..263fe98 Binary files /dev/null and b/skills/agent-langgraph-memory/assets/databricks.png differ diff --git a/skills/agent-langgraph-memory/assets/databricks.svg b/skills/agent-langgraph-memory/assets/databricks.svg new file mode 100644 index 0000000..9d19110 --- /dev/null +++ b/skills/agent-langgraph-memory/assets/databricks.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/skills/agent-openai-memory/SKILL.md b/skills/agent-openai-memory/SKILL.md new file mode 100644 index 0000000..8f6f7b4 --- /dev/null +++ b/skills/agent-openai-memory/SKILL.md @@ -0,0 +1,178 @@ +--- +name: agent-openai-memory +description: "Add memory capabilities to your agent. Use when: (1) User asks about 'memory', 'state', 'remember', 'conversation history', (2) Want to persist conversations or user preferences, (3) Adding checkpointing or long-term storage." +metadata: + version: "0.0.1" +--- + +# Stateful Memory with OpenAI Agents SDK Sessions + +This template uses OpenAI Agents SDK [Sessions](https://openai.github.io/openai-agents-python/sessions/) with `AsyncDatabricksSession` to persist conversation history to a Databricks Lakebase instance. + +## How Sessions Work + +Sessions automatically manage conversation history for multi-turn interactions: + +1. **Before each run**: The session retrieves prior conversation history and prepends it to input +2. **During the run**: New items (user messages, responses, tool calls) are generated +3. **After each run**: All new items are automatically stored in the session + +This eliminates the need to manually manage conversation state between runs. + +## Key Concepts + +| Concept | Description | +|---------|-------------| +| **Session** | Stores conversation history for a specific `session_id` | +| **`session_id`** | Unique identifier linking requests to the same conversation | +| **`AsyncDatabricksSession`** | Session implementation backed by Databricks Lakebase | +| **`LAKEBASE_INSTANCE_NAME`** | Environment variable specifying the Lakebase instance | + +## How This Template Uses Sessions + +### Session Creation (`agent_server/agent.py`) + +```python +from databricks_openai.agents import AsyncDatabricksSession + +session = AsyncDatabricksSession( + session_id=get_session_id(request), + instance_name=LAKEBASE_INSTANCE_NAME, +) + +result = await Runner.run(agent, messages, session=session) +``` + +### Session ID Extraction (`agent_server/agent.py`) + +The `session_id` is extracted from `custom_inputs` or auto-generated: + +```python +def get_session_id(request: ResponsesAgentRequest) -> str: + if hasattr(request, "custom_inputs") and request.custom_inputs: + if "session_id" in request.custom_inputs: + return request.custom_inputs["session_id"] + return str(uuid7()) +``` + +### Lakebase Instance Resolution (`agent_server/utils.py`) + +The `LAKEBASE_INSTANCE_NAME` env var can be either an instance name or a hostname. The `resolve_lakebase_instance_name()` function handles both cases: + +```python +_LAKEBASE_INSTANCE_NAME_RAW = os.environ.get("LAKEBASE_INSTANCE_NAME") +LAKEBASE_INSTANCE_NAME = resolve_lakebase_instance_name(_LAKEBASE_INSTANCE_NAME_RAW) +``` + +--- + +## Prerequisites + +1. **Dependency**: `databricks-openai[memory]` must be in `pyproject.toml` (already included) + +2. **Lakebase instance**: You need a Databricks Lakebase instance. See the **lakebase-setup** skill for creating and configuring one. + +3. **Environment variable**: Set `LAKEBASE_INSTANCE_NAME` in your `.env` file: + ```bash + LAKEBASE_INSTANCE_NAME= + ``` + +--- + +## Configuration Files + +### databricks.yml (Lakebase Resource) + +Add the Lakebase database resource to your app: + +```yaml +resources: + apps: + agent_openai_advanced: + name: "your-app-name" + source_code_path: ./ + + resources: + # ... other resources (experiment, etc.) ... + + # Lakebase instance for session storage + - name: 'database' + database: + instance_name: '' + database_name: 'databricks_postgres' + permission: 'CAN_CONNECT_AND_CREATE' +``` + +### databricks.yml config block (Environment Variables) + +The `LAKEBASE_INSTANCE_NAME` env var is resolved from the database resource at deploy time. Add to your app's `config.env` in `databricks.yml`: + +```yaml + config: + env: + - name: LAKEBASE_INSTANCE_NAME + value_from: "database" +``` + +### .env (Local Development) + +```bash +LAKEBASE_INSTANCE_NAME= +``` + +--- + +## Testing Sessions + +### Test Multi-Turn Conversation Locally + +```bash +# Start the server +uv run start-app + +# First message - starts a new session +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{"input": [{"role": "user", "content": "Hello, I live in SF!"}]}' + +# Note the session_id from custom_outputs in the response + +# Second message - continues the same session +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ + "input": [{"role": "user", "content": "What city did I say I live in?"}], + "custom_inputs": {"session_id": ""} + }' +``` + +### Test Streaming + +```bash +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ + "input": [{"role": "user", "content": "Hello!"}], + "stream": true + }' +``` + +--- + +## Troubleshooting + +| Issue | Cause | Solution | +|-------|-------|----------| +| **"LAKEBASE_INSTANCE_NAME environment variable is required"** | Missing env var | Set `LAKEBASE_INSTANCE_NAME` in `.env` | +| **SSL connection closed unexpectedly** | Network/instance issue | Verify Lakebase instance is running: `databricks lakebase instances get ` | +| **Agent doesn't remember previous messages** | Different session_id | Pass the same `session_id` via `custom_inputs` across requests | +| **"Unable to resolve hostname"** | Hostname doesn't match any instance | Verify the hostname or use the instance name directly | +| **Permission denied** | Missing Lakebase access | Add `database` resource to `databricks.yml` with `CAN_CONNECT_AND_CREATE` | + +--- + +## Next Steps + +- Configure Lakebase: see **lakebase-setup** skill +- Test locally: see **run-locally** skill +- Deploy: see **deploy** skill diff --git a/skills/agent-openai-memory/agents/openai.yaml b/skills/agent-openai-memory/agents/openai.yaml new file mode 100644 index 0000000..7706284 --- /dev/null +++ b/skills/agent-openai-memory/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Agent Memory (OpenAI)" + short_description: "Add memory to an OpenAI agent" + icon_small: "./assets/databricks.svg" + icon_large: "./assets/databricks.png" + brand_color: "#FF3621" + default_prompt: "Use $agent-openai-memory for adding memory and conversation persistence to an OpenAI Agents SDK agent." diff --git a/skills/agent-openai-memory/assets/databricks.png b/skills/agent-openai-memory/assets/databricks.png new file mode 100644 index 0000000..263fe98 Binary files /dev/null and b/skills/agent-openai-memory/assets/databricks.png differ diff --git a/skills/agent-openai-memory/assets/databricks.svg b/skills/agent-openai-memory/assets/databricks.svg new file mode 100644 index 0000000..9d19110 --- /dev/null +++ b/skills/agent-openai-memory/assets/databricks.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/skills/create-tools/SKILL.md b/skills/create-tools/SKILL.md new file mode 100644 index 0000000..e3cc076 --- /dev/null +++ b/skills/create-tools/SKILL.md @@ -0,0 +1,28 @@ +--- +name: create-tools +description: "Create Databricks resources that agents connect to as tools. Use when: (1) User needs to create a Genie space, vector search index, UC function, or UC connection, (2) User says 'create tool', 'set up genie', 'create vector search', 'register MCP server', (3) Before add-tools when the resource doesn't exist yet, (4) User asks 'what do I need to create before adding this tool'." +metadata: + version: "0.0.1" +--- + +# Create Tool Resources + +> This skill covers creating the Databricks resources your agent connects to. +> After creating a resource, use the **add-tools** skill to wire it into your agent and grant permissions. + +## Which resource do you need? + +| I want my agent to... | Resource to create | Guide | +|---|---|---| +| Answer questions about structured data | Genie space | `examples/genie-space.md` | +| Search documents / RAG | Vector Search index | `examples/vector-search-index.md` | +| Call custom SQL/Python logic | UC function | `examples/uc-function.md` | +| Connect to an external MCP server | UC connection | `examples/uc-connection.md` | +| Add inline Python tools | Local function tools | `examples/local-python-tools.md` | + +## Workflow + +1. **Discover** existing resources: `uv run discover-tools` (see **discover-tools** skill) +2. **Create** the resource if it doesn't exist (this skill) +3. **Add** the MCP server to your agent code + grant permissions (see **add-tools** skill) +4. **Deploy** (see **deploy** skill) diff --git a/skills/create-tools/agents/openai.yaml b/skills/create-tools/agents/openai.yaml new file mode 100644 index 0000000..a798bd3 --- /dev/null +++ b/skills/create-tools/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Create Tools" + short_description: "Create Databricks resources to use as tools" + icon_small: "./assets/databricks.svg" + icon_large: "./assets/databricks.png" + brand_color: "#FF3621" + default_prompt: "Use $create-tools for creating Databricks resources (Genie spaces, vector search, UC functions) for use as agent tools." diff --git a/skills/create-tools/assets/databricks.png b/skills/create-tools/assets/databricks.png new file mode 100644 index 0000000..263fe98 Binary files /dev/null and b/skills/create-tools/assets/databricks.png differ diff --git a/skills/create-tools/assets/databricks.svg b/skills/create-tools/assets/databricks.svg new file mode 100644 index 0000000..9d19110 --- /dev/null +++ b/skills/create-tools/assets/databricks.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/skills/deploy/SKILL.md b/skills/deploy/SKILL.md new file mode 100644 index 0000000..4c31ae0 --- /dev/null +++ b/skills/deploy/SKILL.md @@ -0,0 +1,250 @@ +--- +name: deploy +description: "Deploy agent to Databricks Apps using DAB (Databricks Asset Bundles). Use when: (1) User says 'deploy', 'push to databricks', or 'bundle deploy', (2) 'App already exists' error occurs, (3) Need to bind/unbind existing apps, (4) Debugging deployed apps, (5) Querying deployed app endpoints." +metadata: + version: "0.0.1" +--- + +# Deploy to Databricks Apps + +## Profile Configuration + +**IMPORTANT:** Before running any `databricks` CLI command, read the `.env` file to get the `DATABRICKS_CONFIG_PROFILE` value. All commands must include the profile: + +```bash +databricks --profile +``` + +For example, if `.env` has `DATABRICKS_CONFIG_PROFILE=dev`, run `databricks bundle deploy --profile dev`. Without this, the CLI may target the wrong workspace. + +## App Naming Convention + +Unless the user specifies a different name, apps should use the prefix `agent-*`: +- `agent-data-analyst` +- `agent-customer-support` +- `agent-code-helper` + +Update the app name in `databricks.yml`: +```yaml +resources: + apps: + {{BUNDLE_NAME}}: + name: "agent-your-app-name" # Use agent-* prefix +``` + +## Deploy Commands + +**IMPORTANT:** Run the pre-flight check before deploying to catch errors early, then run commands to deploy and start your app: + +```bash +# 1. Pre-flight check (starts server locally, sends test request, verifies response) +uv run preflight + +# 2. Validate bundle configuration (catches config errors before deploy) +databricks bundle validate + +# 3. Deploy the bundle (creates/updates resources, uploads files) +databricks bundle deploy + +# 4. Run the app (starts/restarts with uploaded source code) - REQUIRED! +databricks bundle run {{BUNDLE_NAME}} +``` + +> **Note:** `bundle deploy` only uploads files and configures resources. `bundle run` is **required** to actually start/restart the app with the new code. If you only run `deploy`, the app will continue running old code! + +The resource key `{{BUNDLE_NAME}}` matches the app name in `databricks.yml` under `resources.apps`. + +## Handling "App Already Exists" Error + +If `databricks bundle deploy` fails with: +``` +Error: failed to create app +Failed to create app . An app with the same name already exists. +``` + +**Ask the user:** "Would you like to bind the existing app to this bundle, or delete it and create a new one?" + +### Option 1: Bind Existing App (Recommended) + +**Step 1:** Get the existing app's full configuration: +```bash +# Get app config including budget_policy_id and other server-side settings +databricks apps get --output json | jq '{name, budget_policy_id, description}' +``` + +**Step 2:** Update `databricks.yml` to match the existing app's configuration exactly: +```yaml +resources: + apps: + {{BUNDLE_NAME}}: + name: "existing-app-name" # Must match exactly + budget_policy_id: "xxx-xxx-xxx" # Copy from step 1 if present +``` + +> **Why this matters:** Existing apps may have server-side configuration (like `budget_policy_id`) that isn't in your bundle. If these don't match, Terraform will fail with "Provider produced inconsistent result after apply". Always sync the app's current config to `databricks.yml` before binding. + +**Step 3:** If deploying to a `mode: production` target, set `workspace.root_path`: +```yaml +targets: + prod: + mode: production + workspace: + root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target} +``` + +> **Why this matters:** Production mode requires an explicit root path to ensure only one copy of the bundle is deployed. Without this, the deploy will fail with a recommendation to set `workspace.root_path`. + +**Step 4:** Check if already bound, then bind if needed: +```bash +# Check if resource is already managed by this bundle +databricks bundle summary --output json | jq '.resources.apps' + +# If the app appears in the summary, skip binding and go to Step 5 +# If NOT in summary, bind the resource: +databricks bundle deployment bind {{BUNDLE_NAME}} --auto-approve +``` + +> **Note:** If bind fails with "Resource already managed by Terraform", the app is already bound to this bundle. Skip to Step 5 and deploy directly. + +**Step 5:** Deploy: +```bash +databricks bundle deploy +databricks bundle run {{BUNDLE_NAME}} +``` + +### Option 2: Delete and Recreate + +```bash +databricks apps delete +databricks bundle deploy +``` + +**Warning:** This permanently deletes the app's URL, OAuth credentials, and service principal. + +## Unbinding an App + +To remove the link between bundle and deployed app: + +```bash +databricks bundle deployment unbind {{BUNDLE_NAME}} +``` + +Use when: +- Switching to a different app +- Letting bundle create a new app +- Switching between deployed instances + +Note: Unbinding doesn't delete the deployed app. + +## Query Deployed App + +> **IMPORTANT:** Databricks Apps are **only** queryable via OAuth token. You **cannot** use a Personal Access Token (PAT) to query your agent. Attempting to use a PAT will result in a 302 redirect error. + +**Get OAuth token:** +```bash +databricks auth token | jq -r '.access_token' +``` + +**Send request:** +```bash +curl -X POST /invocations \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }' +``` + +**If using memory** - include `user_id` to scope memories per user: +```bash +curl -X POST /invocations \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "input": [{"role": "user", "content": "What do you remember about me?"}], + "custom_inputs": {"user_id": "user@example.com"} + }' +``` + +## On-Behalf-Of (OBO) User Authentication + +To authenticate as the requesting user instead of the app service principal: + +```python +from agent_server.utils import get_user_workspace_client + +# In your agent code +user_client = get_user_workspace_client() +# Use user_client for operations that should run as the user +``` + +This is useful when you want the agent to access resources with the user's permissions rather than the app's service principal permissions. + +See: [OBO authentication documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/auth#retrieve-user-authorization-credentials) + +## Debug Deployed Apps + +```bash +# View logs (follow mode) +databricks apps logs --follow + +# Check app status +databricks apps get --output json | jq '{app_status, compute_status}' + +# Get app URL +databricks apps get --output json | jq -r '.url' +``` + +## Post-Deploy: Autoscaling Lakebase Resources + +If the agent uses **autoscaling Lakebase** (user mentions "autoscaling", "project", or "branch" in the context of Lakebase), the postgres resource is declared natively in `databricks.yml` — `databricks bundle deploy` creates the app with it. You only need to grant table permissions to the app's service principal after deploy: + +```bash +# Find the SP client ID +databricks apps get --output json | jq -r '.service_principal_client_id' + +# Grant table permissions (see scripts/grant_lakebase_permissions.py) +``` + +**See `.claude/skills/add-tools/examples/lakebase-autoscaling.yaml` for the full resource snippet.** Requires CLI v0.295.0+ for native `postgres` resource support. + +## Important Notes + +- **App naming convention**: App names must be prefixed with `agent-` (e.g., `agent-my-assistant`, `agent-data-analyst`) +- **Name is immutable**: Changing the `name` field in `databricks.yml` forces app replacement (destroy + create) +- **Remote Terraform state**: Databricks stores state remotely; same app detected across directories +- **Review the plan**: Look for `# forces replacement` in Terraform output before confirming + +## FAQ + +**Q: I see a 200 OK in the logs, but get an error in the actual stream. What's going on?** + +This is expected behavior. The initial 200 OK confirms stream setup was successful. Errors that occur during streaming don't affect the initial HTTP status code. Check the stream content for the actual error message. + +**Q: When querying my agent, I get a 302 redirect error. What's wrong?** + +You're likely using a Personal Access Token (PAT). Databricks Apps only support OAuth tokens. Generate one with: +```bash +databricks auth token +``` + +**Q: How do I add dependencies to my agent?** + +Use `uv add`: +```bash +uv add +# Example: uv add "mlflow-skinny[databricks]" +``` + +## Troubleshooting + +| Issue | Solution | +|-------|----------| +| Validation errors | Run `databricks bundle validate` to see detailed errors before deploying | +| Permission errors at runtime | Grant resources in `databricks.yml` (see **add-tools** skill) | +| Lakebase access errors | See **lakebase-setup** skill for permissions (if using memory) | +| App not starting | Check `databricks apps logs ` | +| Auth token expired | Run `databricks auth token` again | +| 302 redirect error | Use OAuth token, not PAT | +| "Provider produced inconsistent result" | Sync app config to `databricks.yml` | +| "should set workspace.root_path" | Add `root_path` to production target | +| App running old code after deploy | Run `databricks bundle run {{BUNDLE_NAME}}` after deploy | +| Env var is None in deployed app | Check `value_from` in databricks.yml `config.env` matches resource `name` | diff --git a/skills/deploy/agents/openai.yaml b/skills/deploy/agents/openai.yaml new file mode 100644 index 0000000..63f3c2f --- /dev/null +++ b/skills/deploy/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Deploy Agent App" + short_description: "Deploy agent app via DAB" + icon_small: "./assets/databricks.svg" + icon_large: "./assets/databricks.png" + brand_color: "#FF3621" + default_prompt: "Use $deploy for deploying an agent to Databricks Apps via Databricks Asset Bundles." diff --git a/skills/deploy/assets/databricks.png b/skills/deploy/assets/databricks.png new file mode 100644 index 0000000..263fe98 Binary files /dev/null and b/skills/deploy/assets/databricks.png differ diff --git a/skills/deploy/assets/databricks.svg b/skills/deploy/assets/databricks.svg new file mode 100644 index 0000000..9d19110 --- /dev/null +++ b/skills/deploy/assets/databricks.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/skills/discover-tools/SKILL.md b/skills/discover-tools/SKILL.md new file mode 100644 index 0000000..d7f7d82 --- /dev/null +++ b/skills/discover-tools/SKILL.md @@ -0,0 +1,51 @@ +--- +name: discover-tools +description: "Discover available tools and resources in Databricks workspace. Use when: (1) User asks 'what tools are available', (2) Before writing agent code, (3) Looking for MCP servers, Genie spaces, UC functions, or vector search indexes, (4) User says 'discover', 'find resources', or 'what can I connect to'." +metadata: + version: "0.0.1" +--- + +# Discover Available Tools + +**Run tool discovery BEFORE writing agent code** to understand what resources are available in the workspace. + +## Run Discovery + +```bash +uv run discover-tools +``` + +**Options:** +```bash +# Limit to specific catalog/schema +uv run discover-tools --catalog my_catalog --schema my_schema + +# Output as JSON +uv run discover-tools --format json --output tools.json + +# Save markdown report +uv run discover-tools --output tools.md + +# Use specific Databricks profile +uv run discover-tools --profile DEFAULT +``` + +## What Gets Discovered + +| Resource Type | Description | MCP URL Pattern | +|--------------|-------------|-----------------| +| **UC Functions** | SQL UDFs as agent tools | `{host}/api/2.0/mcp/functions/{catalog}/{schema}` | +| **UC Tables** | Structured data for querying | (via UC functions) | +| **Vector Search Indexes** | RAG applications | `{host}/api/2.0/mcp/vector-search/{catalog}/{schema}` | +| **Genie Spaces** | Natural language data interface | `{host}/api/2.0/mcp/genie/{space_id}` | +| **Custom MCP Servers** | Apps starting with `mcp-*` | `{app_url}/mcp` | +| **External MCP Servers** | Via UC connections | `{host}/api/2.0/mcp/external/{connection_name}` | + +## Next Steps + +After discovering tools: +1. **Add MCP servers to your agent** - See **modify-agent** skill for SDK-specific code examples +2. **Grant permissions** in `databricks.yml` - See **add-tools** skill for YAML snippets +3. **Test locally** with `uv run start-app` - See **run-locally** skill + +Need a resource that doesn't exist yet? See the **create-tools** skill. diff --git a/skills/discover-tools/agents/openai.yaml b/skills/discover-tools/agents/openai.yaml new file mode 100644 index 0000000..7450d8e --- /dev/null +++ b/skills/discover-tools/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Discover Tools" + short_description: "Discover available workspace tools" + icon_small: "./assets/databricks.svg" + icon_large: "./assets/databricks.png" + brand_color: "#FF3621" + default_prompt: "Use $discover-tools for discovering MCP servers, Genie spaces, UC functions, and vector search indexes in a Databricks workspace." diff --git a/skills/discover-tools/assets/databricks.png b/skills/discover-tools/assets/databricks.png new file mode 100644 index 0000000..263fe98 Binary files /dev/null and b/skills/discover-tools/assets/databricks.png differ diff --git a/skills/discover-tools/assets/databricks.svg b/skills/discover-tools/assets/databricks.svg new file mode 100644 index 0000000..9d19110 --- /dev/null +++ b/skills/discover-tools/assets/databricks.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/skills/lakebase-setup/SKILL.md b/skills/lakebase-setup/SKILL.md new file mode 100644 index 0000000..83fa097 --- /dev/null +++ b/skills/lakebase-setup/SKILL.md @@ -0,0 +1,469 @@ +--- +name: lakebase-setup +description: "Configure Lakebase for agent memory storage. Use when: (1) Adding memory capabilities to the agent, (2) 'Failed to connect to Lakebase' errors, (3) Permission errors on checkpoint/store tables, (4) User says 'lakebase', 'memory setup', or 'add memory'." +metadata: + version: "0.0.1" +--- + +# Lakebase Setup for Agent Persistence + +> **Profile reminder:** All `databricks` CLI commands must include the profile from `.env`: `databricks --profile ` or `DATABRICKS_CONFIG_PROFILE= databricks ` + +> **Two types of Lakebase:** Databricks supports **provisioned** instances (with instance name) and **autoscaling** instances (project/branch model). This skill covers both. Make sure you know which Lakebase instance the user is using, ask the user which type they are using if unclear. + +## Use Cases + +Lakebase is used for three distinct purposes across the agent templates: + +| Use case | Templates | Description | +|----------|-----------|-------------| +| **Chat UI conversation history** | All templates | The built-in chat UI (`e2e-chatbot-app-next`) can persist conversations across page refreshes and browser sessions. This is purely UI-side persistence — the agent itself is stateless. | +| **Agent short-term memory** | `agent-langgraph-advanced`, `agent-openai-advanced` | Conversation threads within a session via `AsyncCheckpointSaver` (LangGraph) or `AsyncDatabricksSession` (OpenAI SDK). The agent remembers what was said earlier in the same conversation. | +| **Agent long-term memory** | `agent-langgraph-advanced` | User facts across sessions via `AsyncDatabricksStore`. The agent remembers things about a user from previous conversations. | + +> **Note:** When the quickstart prompts for Lakebase on a non-memory template, it's for **chat UI history** only — not for the agent. Memory templates always require Lakebase. + +## Overview + +Lakebase provides persistent PostgreSQL storage for agents: +- **Short-term memory** (LangGraph): Conversation history within a thread (`AsyncCheckpointSaver`) +- **Long-term memory** (LangGraph): User facts across sessions (`AsyncDatabricksStore`) +- **Short-term memory** (OpenAI SDK): Conversation history via `AsyncDatabricksSession` +- **Long-running agent persistence** (OpenAI SDK): Background task state via custom SQLAlchemy tables (`agent_server` schema) + +> **Note:** For pre-configured memory templates, see: +> - `agent-langgraph-advanced` - Short-term memory, long-term memory, and long-running background tasks (LangGraph) +> - `agent-openai-advanced` - Short-term memory and long-running background tasks (OpenAI SDK) + +## Complete Setup Workflow + +``` +┌───────────────────────────────────────────────────────────────────────────┐ +│ 1. Add dependency → 2. Get instance → 3. Configure DAB │ +│ 4. Configure .env → 5. Deploy → 6. Grant SP permissions → 7. Run │ +└───────────────────────────────────────────────────────────────────────────┘ +``` + +> **Shortcut:** If using a pre-configured memory template, `uv run quickstart` with Lakebase flags handles steps 2-4 automatically. You still need to do steps 5-7 manually. + +--- + +## Step 1: Add Memory Dependency + +Add the memory extra to your `pyproject.toml`: + +```toml +dependencies = [ + "databricks-langchain[memory]", + # ... other dependencies +] +``` + +Then sync dependencies: +```bash +uv sync +``` + +--- + +## Step 2: Create or Get Lakebase Instance + +### Option A: Provisioned Instance + +1. Go to your Databricks workspace +2. Navigate to **Compute** → **Lakebase** +3. Click **Create Instance** (or use an existing one) +4. Note the **instance name** + +### Option B: Autoscaling Instance + +Autoscaling uses a **project/branch** model. You need three values: +- **Project name** (e.g., `my-project`) +- **Branch name** (e.g., `my-branch`) +- **Database ID** (e.g., `db-xxxx-xxxxxxxxxx`) + +Find these via the postgres API: + +```bash +# List projects +databricks api get /api/2.0/postgres/projects --profile + +# List branches for a project +databricks api get /api/2.0/postgres/projects//branches --profile + +# List databases for a branch +databricks api get /api/2.0/postgres/projects//branches//databases --profile +``` + +**Important:** The database ID is the internal ID (e.g., `db-xxxx-xxxxxxxxxx`), NOT `databricks_postgres`. + +--- + +## Step 3: Configure databricks.yml (Lakebase Resource) + +> **Note:** If you ran `uv run quickstart` with Lakebase flags (`--lakebase-provisioned-name` or `--lakebase-autoscaling-project`/`--lakebase-autoscaling-branch`), the quickstart already configured `databricks.yml` for you — including fetching the database ID for autoscaling. Manual configuration is only needed if you didn't use quickstart or need to change values. + +### Option A: Provisioned + +Add the `database` resource to your app in `databricks.yml`: + +```yaml +resources: + apps: + your_app: + name: "your-app-name" + source_code_path: ./ + resources: + # ... other resources (experiment, UC functions, etc.) ... + + # Lakebase instance for long-term memory + - name: 'database' + database: + instance_name: '' + database_name: 'databricks_postgres' + permission: 'CAN_CONNECT_AND_CREATE' +``` + +**Important:** +- The `instance_name: ''` must match the actual Lakebase instance name +- Using the `database` resource type automatically grants the app's service principal access to Lakebase +See `.claude/skills/add-tools/examples/lakebase.yaml` for the YAML snippet. + +### Option B: Autoscaling + +Add the `postgres` resource to your app in `databricks.yml`: + +```yaml +resources: + apps: + your_app: + name: "your-app-name" + source_code_path: ./ + resources: + # ... other resources (experiment, UC functions, etc.) ... + + # Autoscaling Lakebase instance for long-term memory + - name: 'postgres' + postgres: + branch: "projects//branches/" + database: "projects//branches//databases/" + permission: 'CAN_CONNECT_AND_CREATE' +``` + +**Important:** The `branch` and `database` fields use full resource path format. + +See `.claude/skills/add-tools/examples/lakebase-autoscaling.yaml` for the YAML snippet. + +### Add Environment Variables to databricks.yml config block + +**Provisioned:** +```yaml + config: + env: + # Lakebase instance name - resolved from database resource at deploy time + - name: LAKEBASE_INSTANCE_NAME + value_from: "database" + # Static values for embedding configuration + - name: EMBEDDING_ENDPOINT + value: "databricks-gte-large-en" + - name: EMBEDDING_DIMS + value: "1024" +``` + +**Autoscaling:** +```yaml + config: + env: + # Autoscaling Lakebase config + - name: LAKEBASE_AUTOSCALING_PROJECT + value: "" + - name: LAKEBASE_AUTOSCALING_BRANCH + value: "" + # Static values for embedding configuration + - name: EMBEDDING_ENDPOINT + value: "databricks-gte-large-en" + - name: EMBEDDING_DIMS + value: "1024" +``` + +--- + +## Step 4: Configure .env (Local Development) + +For local development, add to `.env`: + +**Provisioned:** +```bash +LAKEBASE_INSTANCE_NAME= +EMBEDDING_ENDPOINT=databricks-gte-large-en +EMBEDDING_DIMS=1024 +``` + +**Autoscaling:** +```bash +LAKEBASE_AUTOSCALING_PROJECT= +LAKEBASE_AUTOSCALING_BRANCH= +EMBEDDING_ENDPOINT=databricks-gte-large-en +EMBEDDING_DIMS=1024 +``` + +**Important:** `embedding_dims` must match the embedding endpoint: + +| Endpoint | Dimensions | +|----------|------------| +| `databricks-gte-large-en` | 1024 | +| `databricks-bge-large-en` | 1024 | + +> **Note:** `.env` is only for local development. When deployed, the app gets values from `databricks.yml` config env. + +--- + +## Step 5: Initialize Tables +## Step 5: Deploy + +Deploy the app so the service principal and resources are created: + +```bash +DATABRICKS_CONFIG_PROFILE= databricks bundle deploy +``` + +--- + +## Step 6: Grant SP Permissions (CRITICAL) + +> **WARNING:** You MUST complete this step before running the app. Without it, the app will fail with database migration errors like `CREATE TABLE IF NOT EXISTS "drizzle"."__drizzle_migrations"` — permission denied. + +After deploying, the app's service principal needs Postgres roles to access Lakebase tables. The DAB resource grants basic connectivity, but you must also grant Postgres-level schema and table permissions. + +**Step 1:** Get the app's service principal client ID: +```bash +DATABRICKS_CONFIG_PROFILE= databricks apps get --output json | jq -r '.service_principal_client_id' +``` + +**Step 2:** Grant permissions using the grant script: + +```bash +# Provisioned: +DATABRICKS_CONFIG_PROFILE= uv run python scripts/grant_lakebase_permissions.py \ + --memory-type --instance-name + +# Autoscaling (endpoint — reads LAKEBASE_AUTOSCALING_ENDPOINT from .env by default): +DATABRICKS_CONFIG_PROFILE= uv run python scripts/grant_lakebase_permissions.py \ + --memory-type --autoscaling-endpoint + +# Autoscaling (project + branch): +DATABRICKS_CONFIG_PROFILE= uv run python scripts/grant_lakebase_permissions.py \ + --memory-type --project --branch +``` + +**Memory type by template:** + +| Template | `--memory-type` value | +|----------|-----------------------| +| `agent-langgraph-advanced` | `langgraph` | +| `agent-openai-advanced` | `openai` | + +The script handles fresh branches gracefully (warns but doesn't fail if tables don't exist yet — they'll be created on first app startup). + +--- + +## Step 7: Run Your App + +```bash +DATABRICKS_CONFIG_PROFILE= databricks bundle run {{BUNDLE_NAME}} +``` + +> **Note:** `bundle deploy` only uploads files and configures resources. `bundle run` is required to actually start the app with the new code. + +--- + +## Complete Examples: databricks.yml with Lakebase + +### Provisioned Lakebase + +```yaml +bundle: + name: agent_langgraph + +resources: + apps: + agent_langgraph: + name: "my-agent-app" + description: "Agent with long-term memory" + source_code_path: ./ + config: + command: ["uv", "run", "start-app"] + env: + - name: MLFLOW_TRACKING_URI + value: "databricks" + - name: MLFLOW_REGISTRY_URI + value: "databricks-uc" + - name: API_PROXY + value: "http://localhost:8000/invocations" + - name: CHAT_APP_PORT + value: "3000" + - name: CHAT_PROXY_TIMEOUT_SECONDS + value: "300" + - name: MLFLOW_EXPERIMENT_ID + value_from: "experiment" + # Lakebase instance name (resolved from database resource) + - name: LAKEBASE_INSTANCE_NAME + value_from: "database" + # Static values for embedding configuration + - name: EMBEDDING_ENDPOINT + value: "databricks-gte-large-en" + - name: EMBEDDING_DIMS + value: "1024" + + resources: + - name: 'experiment' + experiment: + experiment_id: "" + permission: 'CAN_MANAGE' + - name: 'database' + database: + instance_name: '' + database_name: 'databricks_postgres' + permission: 'CAN_CONNECT_AND_CREATE' + +targets: + dev: + mode: development + default: true +``` + +### Autoscaling Lakebase + +```yaml +bundle: + name: agent_langgraph + +resources: + apps: + agent_langgraph: + name: "my-agent-app" + description: "Agent with long-term memory" + source_code_path: ./ + config: + command: ["uv", "run", "start-app"] + env: + - name: MLFLOW_TRACKING_URI + value: "databricks" + - name: MLFLOW_REGISTRY_URI + value: "databricks-uc" + - name: API_PROXY + value: "http://localhost:8000/invocations" + - name: CHAT_APP_PORT + value: "3000" + - name: CHAT_PROXY_TIMEOUT_SECONDS + value: "300" + - name: MLFLOW_EXPERIMENT_ID + value_from: "experiment" + # Autoscaling Lakebase config + - name: LAKEBASE_AUTOSCALING_PROJECT + value: "" + - name: LAKEBASE_AUTOSCALING_BRANCH + value: "" + # Static values for embedding configuration + - name: EMBEDDING_ENDPOINT + value: "databricks-gte-large-en" + - name: EMBEDDING_DIMS + value: "1024" + + resources: + - name: 'experiment' + experiment: + experiment_id: "" + permission: 'CAN_MANAGE' + - name: 'postgres' + postgres: + branch: "projects//branches/" + database: "projects//branches//databases/" + permission: 'CAN_CONNECT_AND_CREATE' + +targets: + dev: + mode: development + default: true +``` + +--- + +## Troubleshooting + +| Issue | Cause | Solution | +|-------|-------|----------| +| **"embedding_dims is required when embedding_endpoint is specified"** | Missing `embedding_dims` parameter | Add `embedding_dims=1024` to AsyncDatabricksStore | +| **"relation 'store' does not exist"** | Tables not initialized | The app creates tables on first use; ensure SP has CREATE permission | +| **"Unable to resolve Lakebase instance 'None'"** | Missing env var in deployed app | Add `LAKEBASE_INSTANCE_NAME` to databricks.yml `config.env` | +| **"permission denied for table store"** | Missing grants | Run `uv run python scripts/grant_lakebase_permissions.py ` to grant permissions | +| **"Failed to connect to Lakebase"** | Wrong instance name or project/branch | Verify values in databricks.yml and .env | +| **Connection pool errors on exit** | Python cleanup race | Ignore `PythonFinalizationError` - it's harmless | +| **App not updated after deploy** | Forgot to run bundle | Run `databricks bundle run ` after deploy | +| **value_from not resolving** | Resource name mismatch | Ensure `value_from` value matches `name` in databricks.yml resources | +| **"Invalid postgres resource parameters"** | Missing `database` field in postgres resource | Add full `database` path: `projects//branches//databases/` | +| **`CREATE TABLE IF NOT EXISTS "drizzle"."__drizzle_migrations"` fails** | Grant step was skipped — SP lacks Postgres permissions | Run `grant_lakebase_permissions.py` with `--memory-type`, then restart the app | + +--- + +## LakebaseClient API (for reference) + +```python +from databricks_ai_bridge.lakebase import LakebaseClient, SchemaPrivilege, TablePrivilege + +# Provisioned: +client = LakebaseClient(instance_name="...") +# Autoscaling: +client = LakebaseClient(project="...", branch="...") + +# Create role (must do first) +client.create_role(identity_name, "SERVICE_PRINCIPAL") + +# Grant schema (note: schemas is a list, grantee not role) +client.grant_schema( + grantee="...", + schemas=["public"], + privileges=[SchemaPrivilege.USAGE, SchemaPrivilege.CREATE], +) + +# Grant tables (note: tables includes schema prefix) +client.grant_table( + grantee="...", + tables=["public.store"], + privileges=[TablePrivilege.SELECT, TablePrivilege.INSERT, ...], +) + +# Execute raw SQL +client.execute("SELECT * FROM pg_tables WHERE schemaname = 'public'") +``` + +### Service Principal Identifiers + +When granting permissions manually, note that Databricks apps have multiple identifiers: + +| Field | Format | Example | +|-------|--------|---------| +| `service_principal_id` | Numeric ID | `1234567890123456` | +| `service_principal_client_id` | UUID | `a1b2c3d4-e5f6-7890-abcd-ef1234567890` | +| `service_principal_name` | String name | `my-app-service-principal` | + +**Get all identifiers:** +```bash +DATABRICKS_CONFIG_PROFILE= databricks apps get --output json | jq '{ + id: .service_principal_id, + client_id: .service_principal_client_id, + name: .service_principal_name +}' +``` + +**Which to use:** +- `LakebaseClient.create_role()` - Use `service_principal_client_id` (UUID) or `service_principal_name` +- Raw SQL grants - Use `service_principal_client_id` (UUID) + +--- + +## Next Steps + +- Add memory to agent code: see **agent-memory** skill +- Test locally: see **run-locally** skill +- Deploy: see **deploy** skill diff --git a/skills/lakebase-setup/agents/openai.yaml b/skills/lakebase-setup/agents/openai.yaml new file mode 100644 index 0000000..e38c752 --- /dev/null +++ b/skills/lakebase-setup/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "Lakebase Setup (Agent Memory)" + short_description: "Configure Lakebase for agent memory" + icon_small: "./assets/databricks.svg" + icon_large: "./assets/databricks.png" + brand_color: "#FF3621" + default_prompt: "Use $lakebase-setup for configuring Lakebase as storage for agent memory." diff --git a/skills/lakebase-setup/assets/databricks.png b/skills/lakebase-setup/assets/databricks.png new file mode 100644 index 0000000..263fe98 Binary files /dev/null and b/skills/lakebase-setup/assets/databricks.png differ diff --git a/skills/lakebase-setup/assets/databricks.svg b/skills/lakebase-setup/assets/databricks.svg new file mode 100644 index 0000000..9d19110 --- /dev/null +++ b/skills/lakebase-setup/assets/databricks.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/skills/load-testing/SKILL.md b/skills/load-testing/SKILL.md new file mode 100644 index 0000000..f25d7de --- /dev/null +++ b/skills/load-testing/SKILL.md @@ -0,0 +1,321 @@ +--- +name: load-testing +description: "Load test a Databricks App to find its maximum QPS. Use when: (1) User says 'load test', 'benchmark', 'QPS', 'throughput', or 'performance test', (2) User wants to find how many queries per second their app can handle, (3) User wants to set up load testing scripts for their agent, (4) User wants to view load test results/dashboard." +metadata: + version: "0.0.1" +--- + +# Load Testing Your Databricks App + +**Goal:** Find the maximum QPS (queries per second) your Databricks App can support. + +## Before You Start — Gather Parameters + +Before beginning, use the `AskUserQuestion` tool to collect the following from the user: + +1. **Do they already have deployed apps to test, or do they need to set up new apps?** +2. **Do they want to mock LLM calls?** Mocking isolates infrastructure throughput from LLM latency — useful for capacity planning. Testing without mocks measures end-to-end performance. +3. **What compute sizes do they want to test?** (Medium, Large, or both) +4. **How many worker configurations do they want to test?** (e.g., 2, 4, 6, 8 workers) +5. **Do they have M2M OAuth credentials (service principal client_id/client_secret)?** — Recommended for tests longer than ~30 minutes. If not, guide them to create one. +6. **What is their `DATABRICKS_HOST`?** (workspace URL) + +--- + +## Step 1: Set Up Load Testing Scripts + +Create a `load-test-scripts/` directory in the project with the following files. These scripts are framework-agnostic and work with any Databricks App. + +### Directory Structure + +``` +/ + agent_server/ # Existing agent code + load-test-scripts/ # Load testing scripts (create this) + run_load_test.py # Main CLI — orchestrates Locust tests + locustfile.py # Locust test definition (SSE streaming, TTFT tracking) + dashboard_template.py # Generates interactive HTML dashboard from results + .env.example # Template for env vars + load-test-runs/ # Test results (auto-created per run) + / + dashboard.html # Interactive dashboard + test_config.json # Test parameters +