From 38c68e6b513ba1f45f14e6419cfbe07faeededf6 Mon Sep 17 00:00:00 2001 From: Pawel Kosiec Date: Thu, 7 May 2026 16:08:05 +0200 Subject: [PATCH 1/9] Improve skills based on CAO pilot report findings Add error recovery patterns, troubleshooting, and coverage gaps identified by the April 2026 DevHub agent benchmark across 25 tasks. Key additions: - Token passthrough workaround and deployment recovery chain - Off-platform TypeScript/Node.js patterns with REST API fallback - pgvector, streaming AI chat, multi-space Genie patterns - Lakehouse Sync UI-only note and REPLICA IDENTITY prerequisite - Jobs and Pipelines troubleshooting tables - CLI STOP directive carve-out for off-platform tasks Co-authored-by: Isaac --- manifest.json | 30 +++++++---- scripts/skills.py | 4 ++ skills/databricks-apps/SKILL.md | 8 +++ .../references/appkit/genie.md | 49 ++++++++++++++++++ .../references/appkit/model-serving.md | 16 ++++++ .../references/platform-guide.md | 36 +++++++++++++ skills/databricks-core/SKILL.md | 14 ++++++ skills/databricks-jobs/SKILL.md | 11 ++++ skills/databricks-lakebase/SKILL.md | 42 ++++++++++++++++ .../references/connectivity.md | 50 +++++++++++++++++++ .../references/synced-tables.md | 19 +++++-- skills/databricks-pipelines/SKILL.md | 11 ++++ 12 files changed, 278 insertions(+), 12 deletions(-) diff --git a/manifest.json b/manifest.json index 54ec72f..421667f 100644 --- a/manifest.json +++ b/manifest.json @@ -1,12 +1,12 @@ { "version": "2", - "updated_at": "2026-04-30T11:02:41Z", + "updated_at": "2026-05-07T13:51:35Z", "skills": { "databricks-apps": { "version": "0.1.1", "description": "Databricks Apps development and deployment (evaluates analytics vs synced tables data access)", "experimental": false, - "updated_at": "2026-04-30T11:00:26Z", + "updated_at": "2026-05-07T13:50:04Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -33,7 +33,7 @@ "version": "0.1.0", "description": "Core Databricks skill for CLI, auth, and data exploration", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-07T13:51:29Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -48,7 +48,7 @@ "version": "0.0.0", "description": "Declarative Automation Bundles (DABs) for deploying and managing Databricks resources", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-05T15:31:42Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -66,7 +66,7 @@ "version": "0.1.0", "description": "Databricks Jobs orchestration and scheduling", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-07T13:50:17Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -78,7 +78,7 @@ "version": "0.1.0", "description": "Databricks Lakebase Postgres: projects, scaling, connectivity, synced tables, and Data API", "experimental": false, - "updated_at": "2026-04-30T11:02:37Z", + "updated_at": "2026-05-07T13:49:37Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -93,7 +93,7 @@ "version": "0.1.0", "description": "Databricks Model Serving endpoint management", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-05T15:31:42Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -105,7 +105,7 @@ "version": "0.1.0", "description": "Databricks Pipelines (DLT) for ETL and streaming", "experimental": false, - "updated_at": "2026-04-23T13:47:44Z", + "updated_at": "2026-05-07T13:50:27Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -152,7 +152,7 @@ "version": "0.1.0", "description": "Migrate Databricks workloads from classic compute to serverless compute, including compatibility checks and concrete fixes", "experimental": false, - "updated_at": "2026-04-24T15:10:23Z", + "updated_at": "2026-05-05T15:31:42Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -164,6 +164,18 @@ "references/networking-and-security.md", "references/streaming-migration.md" ] + }, + "databricks-unity-catalog": { + "version": "0.1.0", + "description": "Unity Catalog setup: storage credentials, external locations, catalogs, schemas, and grants", + "experimental": true, + "updated_at": "2026-05-07T13:51:07Z", + "files": [ + "SKILL.md", + "agents/openai.yaml", + "assets/databricks.png", + "assets/databricks.svg" + ] } } } diff --git a/scripts/skills.py b/scripts/skills.py index cdfdcf7..9d10468 100644 --- a/scripts/skills.py +++ b/scripts/skills.py @@ -48,6 +48,10 @@ "description": "Migrate Databricks workloads from classic compute to serverless compute, including compatibility checks and concrete fixes", "experimental": False, }, + "databricks-unity-catalog": { + "description": "Unity Catalog setup: storage credentials, external locations, catalogs, schemas, and grants", + "experimental": True, + }, } diff --git a/skills/databricks-apps/SKILL.md b/skills/databricks-apps/SKILL.md index 515fa9c..712cd17 100644 --- a/skills/databricks-apps/SKILL.md +++ b/skills/databricks-apps/SKILL.md @@ -179,6 +179,14 @@ databricks apps init --features analytics my-app-name databricks apps init --name my-app-name --features analytics --set "..." --profile ``` +### Known Issue: Go Template Syntax in Generated Files + +`databricks apps init` may generate Go template placeholders like `{{.AppName}}` in `.ts`/`.tsx` files instead of the actual app name. After scaffolding, search generated files for `{{` and replace with the actual app name: + +```bash +grep -r '{{' --include='*.ts' --include='*.tsx' . +``` + ### Directory Naming `databricks apps init` creates directories in kebab-case matching the app name. diff --git a/skills/databricks-apps/references/appkit/genie.md b/skills/databricks-apps/references/appkit/genie.md index 2d00d51..481fa66 100644 --- a/skills/databricks-apps/references/appkit/genie.md +++ b/skills/databricks-apps/references/appkit/genie.md @@ -145,6 +145,55 @@ function GeniePage() { Update smoke tests if headings or routes changed, then `databricks apps validate`. +## Multiple Genie Spaces + +To build an app that lets users switch between multiple Genie spaces (e.g., different datasets or domains): + +**`databricks.yml`** — declare multiple Genie resources with distinct aliases: + +```yaml +variables: + genie_space_sales_id: + description: Sales Genie Space ID + genie_space_support_id: + description: Support Genie Space ID + +resources: + apps: + app: + resources: + - name: genie-sales + genie_space: + space_id: ${var.genie_space_sales_id} + permission: CAN_RUN + - name: genie-support + genie_space: + space_id: ${var.genie_space_support_id} + permission: CAN_RUN +``` + +**`app.yaml`** — inject each space ID as a separate env var: + +```yaml +env: + - name: GENIE_SPACE_SALES + valueFrom: genie-sales + - name: GENIE_SPACE_SUPPORT + valueFrom: genie-support +``` + +**`server/server.ts`** — register the genie plugin once; it reads space IDs from env vars. Each alias becomes a separate `/api/genie/:alias/messages` endpoint. The client-side space selector routes messages to the correct alias. + +**Frontend** — build a selector that switches between spaces: + +```tsx +const spaces = [ + { alias: "sales", label: "Sales Analytics" }, + { alias: "support", label: "Support Metrics" }, +]; +// Route to /api/genie/{alias}/messages based on user selection +``` + ## Frontend **For full component API**: run `npx @databricks/appkit docs "GenieChat"`. diff --git a/skills/databricks-apps/references/appkit/model-serving.md b/skills/databricks-apps/references/appkit/model-serving.md index 0a35761..861ee4b 100644 --- a/skills/databricks-apps/references/appkit/model-serving.md +++ b/skills/databricks-apps/references/appkit/model-serving.md @@ -94,6 +94,22 @@ const result = await trpc.queryModel.query({ prompt: userInput }); const answer = result.choices?.[0]?.message?.content; ``` +## Streaming Chat Pattern + +For real-time token streaming in a chat UI, the serving endpoint must be called with streaming enabled. The Databricks Apps reverse proxy enforces a **120-second timeout** on HTTP requests, so streaming responses must complete within that window. + +- For streaming API details, check `npx @databricks/appkit docs` for the latest streaming support in AppKit +- If AppKit doesn't provide a built-in streaming pattern, use the OpenAI-compatible streaming API via `fetch` with `stream: true` in the request body and process the SSE response +- For interactions exceeding 120 seconds, use **WebSockets** instead of SSE — see [Platform Guide](../platform-guide.md) + +## AI Gateway & Embeddings + +AI Gateway foundation model endpoints (available in the `system.ai` catalog) are called the same way as custom serving endpoints — use the tRPC pattern above with the endpoint name. + +**Embeddings vs chat:** Embedding endpoints use a different request shape — `input` field instead of `messages`. Use `databricks serving-endpoints get-open-api ` from the `databricks-model-serving` skill to discover the expected input/output schema for any endpoint. + +**Storing embeddings:** For similarity search, store embeddings in Lakebase with pgvector — see the **`databricks-lakebase`** skill's pgvector section for `VECTOR` column types, indexes, and query patterns. + ## Troubleshooting | Error | Cause | Solution | diff --git a/skills/databricks-apps/references/platform-guide.md b/skills/databricks-apps/references/platform-guide.md index bb0e53d..66e5a4e 100644 --- a/skills/databricks-apps/references/platform-guide.md +++ b/skills/databricks-apps/references/platform-guide.md @@ -101,6 +101,8 @@ env: ⚠️ Databricks blocks access outside approved scopes even if the user has permission. +**When to use OBO vs SP-only:** Most apps work fine with SP-only authentication. Only add `user_api_scopes` when the app needs to act as the logged-in user (e.g., user-scoped file access, per-user Genie queries). If unsure, start without it — you can add OBO scopes later if needed. Adding `user_api_scopes` requires the workspace to have user token passthrough enabled; if it's not enabled, `bundle deploy` will fail. + ## Deployment Workflow ⚠️ **USER CONSENT REQUIRED** — always confirm with the user before deploying. @@ -117,6 +119,37 @@ databricks bundle run -t --profile ❌ **Common mistake:** Running only `bundle deploy` and expecting the app to update. Deploy uploads code but does NOT apply config changes or restart the app. Use `databricks apps deploy` or add `bundle run` after `bundle deploy`. +### Verify Deployment + +After deploying, confirm the app is running: + +```bash +databricks apps get --profile +``` + +Check these fields in the response: +- `app_status`: should be `RUNNING` (may show `STARTING` immediately after deploy — poll until `RUNNING`) +- `compute_status`: should be `ACTIVE` +- `active_deployment.status.state`: should be `SUCCEEDED` + +For real-time log monitoring: `databricks apps logs --follow --profile ` + +### Deployment Recovery + +If `databricks bundle deploy` fails on terraform or infrastructure errors, `databricks apps deploy` is a direct fallback — it uploads source code and applies `app.yaml` without the bundle/terraform layer: + +```bash +# Fallback when bundle deploy fails +databricks apps deploy --profile +``` + +If deployment fails with `File is larger than 10485760 bytes` pointing to files inside `.databricks/bundle/` (terraform binary cached by the bundle), delete the `.databricks/` directory and retry: + +```bash +rm -rf .databricks/ +databricks apps deploy --profile +``` + ### ⚠️ Destructive Updates Warning `databricks apps update` (and `bundle run`) performs a **full replacement**, not a merge: @@ -170,3 +203,6 @@ For long-running agent interactions, use **WebSockets** instead of SSE. | OBO scopes missing after deploy | Destructive update wiped them | Re-apply scopes after each deploy | | `${var.xxx}` appears literally in env | Variables not resolved in config | Use literal values, not bundle variables | | 504 Gateway Timeout | Request exceeded 120s | Use WebSockets for long operations | +| `user token passthrough not enabled` | `user_api_scopes` in `databricks.yml` requires OBO auth, which is not enabled in the workspace | Remove the `user_api_scopes` block if the app doesn't need user-specific data — app runs under SP permissions instead | +| `File is larger than 10485760 bytes` in `.databricks/` | Terraform binary cached by bundle exceeds workspace file upload limit | Delete `.databricks/` directory and use `databricks apps deploy` instead of `bundle deploy` | +| `databricks apps logs` auth error | PAT authentication is incompatible with `databricks apps logs` | Use OAuth authentication: run `databricks auth login` instead of PAT-based profile | diff --git a/skills/databricks-core/SKILL.md b/skills/databricks-core/SKILL.md index 4d1abc9..39aefaf 100644 --- a/skills/databricks-core/SKILL.md +++ b/skills/databricks-core/SKILL.md @@ -18,6 +18,7 @@ For specific products, use dedicated skills: - **databricks-apps** - Full-stack TypeScript app development and deployment - **databricks-lakebase** - Lakebase Postgres Autoscaling project management - **databricks-model-serving** - Model Serving endpoint management and inference +- **databricks-unity-catalog** - Unity Catalog setup, storage credentials, external locations, and grants ## Prerequisites @@ -25,6 +26,7 @@ For specific products, use dedicated skills: - **If the CLI is missing or outdated (< v0.292.0): STOP. Do not proceed or work around a missing CLI.** - **Read the [CLI Installation](databricks-cli-install.md) reference file and follow the instructions to guide the user through installation.** - Note: In sandboxed environments (Cursor IDE, containers), install commands write outside the workspace and may be blocked. Present the install command to the user and ask them to run it in their own terminal. + - **Exception for off-platform tasks:** If CLI installation is blocked (sandboxed containers, restricted environments) and the task does NOT require deploying to Databricks, fall back to direct REST API calls using `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables if present in the shell. See the `databricks-lakebase` connectivity guide for REST API patterns. 2. **Authenticated**: `databricks auth profiles` - If not: see [CLI Authentication](databricks-cli-auth.md) @@ -134,6 +136,18 @@ databricks bundle run -t --profile | Exploring tables/schemas | [Data Exploration](data-exploration.md) | | Deploying jobs/pipelines | Use `/databricks-dabs` | +## Getting Started (End-to-End) + +For a full local-to-production workflow: + +1. **Install CLI** → [CLI Installation](databricks-cli-install.md) +2. **Authenticate** → [CLI Authentication](databricks-cli-auth.md) → select profile +3. **Discover resources** → `databricks experimental aitools tools get-default-warehouse`, `databricks catalogs list` +4. **Scaffold app** → `databricks apps init --name --features <...> --set <...> --profile ` (see `databricks-apps` skill) +5. **Local dev** → `cd && npm install && npm run dev` +6. **Deploy** → `databricks apps deploy --profile ` +7. **Verify** → `databricks apps get --profile ` → check `app_status: RUNNING` + ## Reference Guides - [CLI Installation](databricks-cli-install.md) diff --git a/skills/databricks-jobs/SKILL.md b/skills/databricks-jobs/SKILL.md index f9986c1..125a31c 100644 --- a/skills/databricks-jobs/SKILL.md +++ b/skills/databricks-jobs/SKILL.md @@ -182,6 +182,17 @@ uv run pytest 3. **Run**: `databricks bundle run -t dev --profile ` 4. **Check run status**: `databricks jobs get-run --run-id --profile ` +## Troubleshooting + +| Error | Cause | Fix | +|-------|-------|-----| +| `PERMISSION_DENIED` on deploy | User or SP lacks workspace permissions | Grant `CAN_MANAGE` on the job, or `CAN_MANAGE_RUN` for run-only | +| `RESOURCE_DOES_NOT_EXIST` for notebook | Wrong path or not deployed | Check `notebook_path` is relative to bundle root (`../src/`); deploy first | +| `TABLE_OR_VIEW_NOT_FOUND` in job run | Catalog/schema not accessible from serverless compute | Verify UC permissions; ensure `catalog` and `schema` params resolve correctly | +| `InvalidParameterValue` for task | Malformed YAML task config | Validate with `databricks bundle validate --strict --profile ` | +| Run stuck in PENDING | No available compute | Check cluster policy quotas or switch to serverless compute | +| Job run fails silently | Task dependency not met | Check `depends_on` chains; a failed upstream task skips downstream tasks | + ## Documentation - Lakeflow Jobs: https://docs.databricks.com/jobs diff --git a/skills/databricks-lakebase/SKILL.md b/skills/databricks-lakebase/SKILL.md index fba9381..af800c6 100644 --- a/skills/databricks-lakebase/SKILL.md +++ b/skills/databricks-lakebase/SKILL.md @@ -250,6 +250,48 @@ Get SP client ID: `databricks apps get --profile ` → `serv **Data API:** PostgREST-compatible HTTP CRUD on Postgres tables. See [connectivity.md](references/connectivity.md). **Synced Tables:** Sync Delta tables into Lakebase. See [synced-tables.md](references/synced-tables.md). +## pgvector (Vector Embeddings) + +Lakebase supports the `pgvector` PostgreSQL extension for storing and querying vector embeddings (e.g., from AI Gateway or Model Serving endpoints). + +**Check availability and enable:** +```sql +-- Verify pgvector is available +SELECT * FROM pg_available_extensions WHERE name = 'vector'; + +-- Enable the extension +CREATE EXTENSION IF NOT EXISTS vector; +``` + +**Create a table with vector column:** +```sql +CREATE TABLE items ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding VECTOR(1536) -- dimension must match your embedding model +); +``` + +**Create an index for fast similarity search:** +```sql +-- IVFFlat: faster queries, approximate results (good default) +CREATE INDEX ON items USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + +-- HNSW: better recall, higher memory usage +CREATE INDEX ON items USING hnsw (embedding vector_cosine_ops); +``` + +**Query by similarity:** +```sql +-- Cosine distance (use <=> operator) +SELECT id, content, embedding <=> $1 AS distance +FROM items +ORDER BY embedding <=> $1 +LIMIT 10; +``` + +To generate embeddings, use AI Gateway or Model Serving endpoints — see the **`databricks-apps`** model-serving guide. + ## Troubleshooting | Error | Solution | diff --git a/skills/databricks-lakebase/references/connectivity.md b/skills/databricks-lakebase/references/connectivity.md index 6ee62e8..077d1ce 100644 --- a/skills/databricks-lakebase/references/connectivity.md +++ b/skills/databricks-lakebase/references/connectivity.md @@ -131,6 +131,56 @@ conn = psycopg.connect( For production apps, combine with Pattern 2's token refresh loop and SQLAlchemy pooling. For the full app development workflow (scaffolding, tRPC, schema init), use the **`databricks-apps`** skill. +### Pattern 5: Off-Platform Apps (TypeScript/Node.js) + +For apps running outside Databricks (external servers, local dev, CI/CD) that connect to Lakebase. Uses the `pg` driver with OAuth token from the Databricks REST API. + +**Discover Lakebase endpoint (when CLI is unavailable):** + +If the Databricks CLI is not installed (sandboxed environments, restricted containers), use `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables to call the REST API directly: + +```bash +# List Lakebase projects +curl -s -H "Authorization: Bearer $DATABRICKS_TOKEN" \ + "https://$DATABRICKS_HOST/api/2.0/postgres/projects" | jq . + +# Get endpoint details (host, port) +curl -s -H "Authorization: Bearer $DATABRICKS_TOKEN" \ + "https://$DATABRICKS_HOST/api/2.0/postgres/endpoints/projects//branches//endpoints/" | jq . + +# Generate database credential (OAuth token for Postgres) +curl -s -X POST -H "Authorization: Bearer $DATABRICKS_TOKEN" \ + "https://$DATABRICKS_HOST/api/2.0/postgres/credentials" \ + -d '{"endpoint": "projects//branches//endpoints/"}' | jq . +``` + +Use the results to populate `.env.local` with real values (not placeholders). + +**TypeScript connection with token refresh:** + +```typescript +import pg from "pg"; + +// Token refresh: Lakebase OAuth tokens expire in 1 hour. +// Refresh every 30-40 minutes via background interval. +let currentToken = await generateToken(); +setInterval(async () => { + currentToken = await generateToken(); +}, 30 * 60 * 1000); // 30 minutes + +const pool = new pg.Pool({ + host: process.env.PGHOST, + port: parseInt(process.env.PGPORT || "5432"), + database: process.env.PGDATABASE || "databricks_postgres", + user: process.env.PGUSER, + password: () => currentToken, // dynamic password via callback + ssl: { rejectUnauthorized: false }, // required — see SSL note below + max: 10, +}); +``` + +**⚠️ SSL gotcha:** The `pg` driver's default `ssl: true` sets `rejectUnauthorized: true`, which fails against Lakebase certificates. Use `ssl: { rejectUnauthorized: false }` or pass `sslmode=require` in the connection string. This also affects Drizzle ORM: if `drizzle-kit migrate` doesn't support custom SSL options, use Drizzle's programmatic `migrate()` API for finer control over the connection. + ## Best Practices - **Always use `sslmode=require`** — Lakebase requires SSL/TLS on all connections diff --git a/skills/databricks-lakebase/references/synced-tables.md b/skills/databricks-lakebase/references/synced-tables.md index e770e5e..6822b87 100644 --- a/skills/databricks-lakebase/references/synced-tables.md +++ b/skills/databricks-lakebase/references/synced-tables.md @@ -188,9 +188,22 @@ If a Databricks App reads synced tables, the app's Service Principal needs expli ## Lakehouse Sync (Beta, AWS only) -> **Note:** This feature is not yet documented in the official synced tables docs. Verify availability in your workspace. - -Reverse direction: continuously streams changes **from** Lakebase Postgres **into** Unity Catalog Delta tables using CDC. Enables analytics and downstream pipelines on OLTP-written data. Azure support not yet available. +Reverse direction: continuously streams changes **from** Lakebase Postgres **into** Unity Catalog Delta tables using CDC (SCD Type 2 history). Enables analytics and downstream pipelines on OLTP-written data. Does not require external compute, pipelines, or jobs — it is a native Lakebase feature. Azure support not yet available. + +**Lakehouse Sync enablement is a UI-only action** — configured via the "Lakehouse sync" tab in the branch overview, not via CLI or API. It operates at the **schema level**: once enabled, all current and future tables in that schema sync to Unity Catalog as Delta tables. When automating CDC workflows, treat this as a manual post-automation step and inform the user. + +**Prerequisites:** +- `REPLICA IDENTITY FULL` must be set on all source Postgres tables before enabling sync. This requires table ownership. + ```sql + ALTER TABLE . REPLICA IDENTITY FULL; + ``` +- Verify replica identity is set: + ```sql + SELECT n.nspname AS schema, c.relname AS table_name, + CASE c.relreplident WHEN 'f' THEN 'full' WHEN 'd' THEN 'default' WHEN 'n' THEN 'nothing' END AS replica_identity + FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE c.relkind = 'r' AND n.nspname = 'public'; + ``` ## Use Cases diff --git a/skills/databricks-pipelines/SKILL.md b/skills/databricks-pipelines/SKILL.md index d08d0b1..25d95d6 100644 --- a/skills/databricks-pipelines/SKILL.md +++ b/skills/databricks-pipelines/SKILL.md @@ -255,6 +255,17 @@ resources: 3. **Run pipeline**: `databricks bundle run -t dev --profile ` 4. **Check status**: `databricks pipelines get --pipeline-id --profile ` +## Troubleshooting + +| Error | Cause | Fix | +|-------|-------|-----| +| `PERMISSION_DENIED` on pipeline run | User or SP lacks pipeline or catalog permissions | Grant `CAN_MANAGE` on pipeline; verify UC catalog/schema grants | +| `Table or view not found` | Target catalog/schema misconfigured or not deployed | Check `catalog` and `target` in pipeline settings; deploy first | +| `UpdateError: Cannot change dataset type` | Tried to change ST→MV or MV→ST | Manually drop the existing table, then deploy and run again | +| Pipeline stuck in STARTING | Compute provisioning issue | Check cluster policy quotas; use serverless compute | +| Full refresh data loss | `full_refresh` on production pipeline deletes and recreates all tables | Use selective refresh (`--refresh
`) unless full refresh is explicitly needed | +| `DeltaStreamIllegalStateException` | Streaming checkpoint corrupted or source schema changed | Try `full_refresh` on the affected table only, or delete checkpoint | + ## Pipeline API Reference Detailed reference guides for each pipeline API. **Read the relevant guide before writing pipeline code.** From ab3b1ddda729744d790aed9237b7e7389e0973de Mon Sep 17 00:00:00 2001 From: Pawel Kosiec Date: Thu, 7 May 2026 16:11:49 +0200 Subject: [PATCH 2/9] Address review feedback on CAO report improvements - Revert scaffolding bug "Known Issue" (Go template syntax is intentional AppKit design, not a bug) - Drop Unity Catalog skill (separate PR) - Replace raw pg driver + REST API curl patterns with @databricks/lakebase package (standalone, auto token refresh) - Replace detailed pgvector section with brief PostgreSQL extensions note linking to official docs Co-authored-by: Isaac --- manifest.json | 20 ++----- scripts/skills.py | 4 -- skills/databricks-apps/SKILL.md | 8 --- skills/databricks-core/SKILL.md | 1 - skills/databricks-lakebase/SKILL.md | 42 +++------------ .../references/connectivity.md | 54 ++++++------------- 6 files changed, 27 insertions(+), 102 deletions(-) diff --git a/manifest.json b/manifest.json index 421667f..c6f1896 100644 --- a/manifest.json +++ b/manifest.json @@ -1,12 +1,12 @@ { "version": "2", - "updated_at": "2026-05-07T13:51:35Z", + "updated_at": "2026-05-07T14:09:14Z", "skills": { "databricks-apps": { "version": "0.1.1", "description": "Databricks Apps development and deployment (evaluates analytics vs synced tables data access)", "experimental": false, - "updated_at": "2026-05-07T13:50:04Z", + "updated_at": "2026-05-07T14:08:13Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -33,7 +33,7 @@ "version": "0.1.0", "description": "Core Databricks skill for CLI, auth, and data exploration", "experimental": false, - "updated_at": "2026-05-07T13:51:29Z", + "updated_at": "2026-05-07T14:08:19Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -78,7 +78,7 @@ "version": "0.1.0", "description": "Databricks Lakebase Postgres: projects, scaling, connectivity, synced tables, and Data API", "experimental": false, - "updated_at": "2026-05-07T13:49:37Z", + "updated_at": "2026-05-07T14:09:06Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -164,18 +164,6 @@ "references/networking-and-security.md", "references/streaming-migration.md" ] - }, - "databricks-unity-catalog": { - "version": "0.1.0", - "description": "Unity Catalog setup: storage credentials, external locations, catalogs, schemas, and grants", - "experimental": true, - "updated_at": "2026-05-07T13:51:07Z", - "files": [ - "SKILL.md", - "agents/openai.yaml", - "assets/databricks.png", - "assets/databricks.svg" - ] } } } diff --git a/scripts/skills.py b/scripts/skills.py index 9d10468..cdfdcf7 100644 --- a/scripts/skills.py +++ b/scripts/skills.py @@ -48,10 +48,6 @@ "description": "Migrate Databricks workloads from classic compute to serverless compute, including compatibility checks and concrete fixes", "experimental": False, }, - "databricks-unity-catalog": { - "description": "Unity Catalog setup: storage credentials, external locations, catalogs, schemas, and grants", - "experimental": True, - }, } diff --git a/skills/databricks-apps/SKILL.md b/skills/databricks-apps/SKILL.md index 712cd17..515fa9c 100644 --- a/skills/databricks-apps/SKILL.md +++ b/skills/databricks-apps/SKILL.md @@ -179,14 +179,6 @@ databricks apps init --features analytics my-app-name databricks apps init --name my-app-name --features analytics --set "..." --profile ``` -### Known Issue: Go Template Syntax in Generated Files - -`databricks apps init` may generate Go template placeholders like `{{.AppName}}` in `.ts`/`.tsx` files instead of the actual app name. After scaffolding, search generated files for `{{` and replace with the actual app name: - -```bash -grep -r '{{' --include='*.ts' --include='*.tsx' . -``` - ### Directory Naming `databricks apps init` creates directories in kebab-case matching the app name. diff --git a/skills/databricks-core/SKILL.md b/skills/databricks-core/SKILL.md index 39aefaf..b68096f 100644 --- a/skills/databricks-core/SKILL.md +++ b/skills/databricks-core/SKILL.md @@ -18,7 +18,6 @@ For specific products, use dedicated skills: - **databricks-apps** - Full-stack TypeScript app development and deployment - **databricks-lakebase** - Lakebase Postgres Autoscaling project management - **databricks-model-serving** - Model Serving endpoint management and inference -- **databricks-unity-catalog** - Unity Catalog setup, storage credentials, external locations, and grants ## Prerequisites diff --git a/skills/databricks-lakebase/SKILL.md b/skills/databricks-lakebase/SKILL.md index af800c6..3cd0e93 100644 --- a/skills/databricks-lakebase/SKILL.md +++ b/skills/databricks-lakebase/SKILL.md @@ -250,48 +250,18 @@ Get SP client ID: `databricks apps get --profile ` → `serv **Data API:** PostgREST-compatible HTTP CRUD on Postgres tables. See [connectivity.md](references/connectivity.md). **Synced Tables:** Sync Delta tables into Lakebase. See [synced-tables.md](references/synced-tables.md). -## pgvector (Vector Embeddings) +## PostgreSQL Extensions -Lakebase supports the `pgvector` PostgreSQL extension for storing and querying vector embeddings (e.g., from AI Gateway or Model Serving endpoints). +Lakebase supports PostgreSQL extensions (e.g., `pgvector` for vector embeddings, `pg_stat_statements` for query statistics). See the [full list of supported extensions](https://docs.databricks.com/aws/en/oltp/projects/extensions). -**Check availability and enable:** ```sql --- Verify pgvector is available -SELECT * FROM pg_available_extensions WHERE name = 'vector'; +-- List available extensions +SELECT * FROM pg_available_extensions ORDER BY name; --- Enable the extension -CREATE EXTENSION IF NOT EXISTS vector; +-- Install an extension +CREATE EXTENSION IF NOT EXISTS ; ``` -**Create a table with vector column:** -```sql -CREATE TABLE items ( - id SERIAL PRIMARY KEY, - content TEXT NOT NULL, - embedding VECTOR(1536) -- dimension must match your embedding model -); -``` - -**Create an index for fast similarity search:** -```sql --- IVFFlat: faster queries, approximate results (good default) -CREATE INDEX ON items USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); - --- HNSW: better recall, higher memory usage -CREATE INDEX ON items USING hnsw (embedding vector_cosine_ops); -``` - -**Query by similarity:** -```sql --- Cosine distance (use <=> operator) -SELECT id, content, embedding <=> $1 AS distance -FROM items -ORDER BY embedding <=> $1 -LIMIT 10; -``` - -To generate embeddings, use AI Gateway or Model Serving endpoints — see the **`databricks-apps`** model-serving guide. - ## Troubleshooting | Error | Solution | diff --git a/skills/databricks-lakebase/references/connectivity.md b/skills/databricks-lakebase/references/connectivity.md index 077d1ce..498b9d5 100644 --- a/skills/databricks-lakebase/references/connectivity.md +++ b/skills/databricks-lakebase/references/connectivity.md @@ -133,53 +133,33 @@ For production apps, combine with Pattern 2's token refresh loop and SQLAlchemy ### Pattern 5: Off-Platform Apps (TypeScript/Node.js) -For apps running outside Databricks (external servers, local dev, CI/CD) that connect to Lakebase. Uses the `pg` driver with OAuth token from the Databricks REST API. - -**Discover Lakebase endpoint (when CLI is unavailable):** - -If the Databricks CLI is not installed (sandboxed environments, restricted containers), use `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables to call the REST API directly: +For apps running outside Databricks (external servers, local dev, CI/CD) that connect to Lakebase. Use the `@databricks/lakebase` package — it works standalone without AppKit. ```bash -# List Lakebase projects -curl -s -H "Authorization: Bearer $DATABRICKS_TOKEN" \ - "https://$DATABRICKS_HOST/api/2.0/postgres/projects" | jq . - -# Get endpoint details (host, port) -curl -s -H "Authorization: Bearer $DATABRICKS_TOKEN" \ - "https://$DATABRICKS_HOST/api/2.0/postgres/endpoints/projects//branches//endpoints/" | jq . - -# Generate database credential (OAuth token for Postgres) -curl -s -X POST -H "Authorization: Bearer $DATABRICKS_TOKEN" \ - "https://$DATABRICKS_HOST/api/2.0/postgres/credentials" \ - -d '{"endpoint": "projects//branches//endpoints/"}' | jq . +npm install @databricks/lakebase ``` -Use the results to populate `.env.local` with real values (not placeholders). - -**TypeScript connection with token refresh:** - ```typescript -import pg from "pg"; - -// Token refresh: Lakebase OAuth tokens expire in 1 hour. -// Refresh every 30-40 minutes via background interval. -let currentToken = await generateToken(); -setInterval(async () => { - currentToken = await generateToken(); -}, 30 * 60 * 1000); // 30 minutes +import { createLakebasePool } from "@databricks/lakebase"; -const pool = new pg.Pool({ +const pool = createLakebasePool({ host: process.env.PGHOST, - port: parseInt(process.env.PGPORT || "5432"), - database: process.env.PGDATABASE || "databricks_postgres", - user: process.env.PGUSER, - password: () => currentToken, // dynamic password via callback - ssl: { rejectUnauthorized: false }, // required — see SSL note below - max: 10, + database: process.env.PGDATABASE, + endpoint: process.env.LAKEBASE_ENDPOINT, + // Authentication: reads from DATABRICKS_HOST + DATABRICKS_TOKEN, + // .databrickscfg, or explicit workspaceClient }); + +// Returns a standard pg.Pool — works with Drizzle, Prisma, or any PostgreSQL library +const { rows } = await pool.query("SELECT * FROM my_table LIMIT 10"); ``` -**⚠️ SSL gotcha:** The `pg` driver's default `ssl: true` sets `rejectUnauthorized: true`, which fails against Lakebase certificates. Use `ssl: { rejectUnauthorized: false }` or pass `sslmode=require` in the connection string. This also affects Drizzle ORM: if `drizzle-kit migrate` doesn't support custom SSL options, use Drizzle's programmatic `migrate()` API for finer control over the connection. +**What `@databricks/lakebase` handles automatically:** +- **OAuth token refresh** — tokens expire after 1 hour; the package refreshes 2 minutes before expiry with request deduplication +- **SSL** — defaults to `sslmode=require` +- **Connection pooling** — configurable `max`, `idleTimeoutMillis`, `connectionTimeoutMillis` + +**Authentication chain** (in order): explicit `workspaceClient` → Databricks SDK default auth (`DATABRICKS_HOST` + `DATABRICKS_TOKEN`, `.databrickscfg`) → `currentUser.me()` API fallback. For native Postgres password auth (bypassing OAuth), pass `password` directly. ## Best Practices From a65a3427c00a6c446281a4512dc84461975ae6be Mon Sep 17 00:00:00 2001 From: Pawel Kosiec Date: Thu, 7 May 2026 16:29:50 +0200 Subject: [PATCH 3/9] Fix skill bugs: Lakebase API pattern, Genie resource, CLI versions - Replace outdated createLakebasePool() + pool.query() pattern with AppKit plugin pattern: AppKit.lakebase.query() - Fix Genie databricks.yml: remove nonexistent `name` field and `genie_space_name` variable from genie_space resource - Add missing user_api_scopes (files.files) for files plugin - Improve model serving streaming docs (SSE proxy, not AI SDK) - Bump CLI version to >= v0.296.0 across all 7 skills for consistency - Add multi-environment deploy note in Lakebase scaffolding Co-authored-by: Isaac --- skills/databricks-apps/SKILL.md | 2 +- .../references/appkit/files.md | 10 ++++ .../references/appkit/genie.md | 4 -- .../references/appkit/lakebase.md | 60 +++++++++---------- .../references/appkit/model-serving.md | 7 ++- skills/databricks-core/SKILL.md | 4 +- skills/databricks-jobs/SKILL.md | 2 +- skills/databricks-lakebase/SKILL.md | 2 +- skills/databricks-model-serving/SKILL.md | 2 +- skills/databricks-pipelines/SKILL.md | 2 +- .../databricks-serverless-migration/SKILL.md | 2 +- 11 files changed, 50 insertions(+), 47 deletions(-) diff --git a/skills/databricks-apps/SKILL.md b/skills/databricks-apps/SKILL.md index 515fa9c..2f94340 100644 --- a/skills/databricks-apps/SKILL.md +++ b/skills/databricks-apps/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-apps description: "Build apps on Databricks Apps platform. Use when asked to create dashboards, data apps, analytics tools, or visualizations. Evaluates data access patterns (analytics vs Lakebase synced tables) before scaffolding. Invoke BEFORE starting implementation." -compatibility: Requires databricks CLI (>= v0.294.0) +compatibility: Requires databricks CLI (>= v0.296.0) metadata: version: "0.1.1" parent: databricks-core diff --git a/skills/databricks-apps/references/appkit/files.md b/skills/databricks-apps/references/appkit/files.md index 3432f09..aef6cb9 100644 --- a/skills/databricks-apps/references/appkit/files.md +++ b/skills/databricks-apps/references/appkit/files.md @@ -244,6 +244,16 @@ resources: permission: WRITE_VOLUME ``` +The files plugin requires OBO user token passthrough for user-level file access. Add `user_api_scopes` to `databricks.yml`: + +```yaml +resources: + apps: + my_app: + user_api_scopes: + - files.files +``` + Wire the env var in `app.yaml`: ```yaml diff --git a/skills/databricks-apps/references/appkit/genie.md b/skills/databricks-apps/references/appkit/genie.md index 481fa66..aab6e5b 100644 --- a/skills/databricks-apps/references/appkit/genie.md +++ b/skills/databricks-apps/references/appkit/genie.md @@ -81,8 +81,6 @@ npm install && npm run dev variables: genie_space_id: description: Genie Space ID - genie_space_name: - description: Genie Space name resources: apps: @@ -91,7 +89,6 @@ resources: # ... existing resources ... - name: genie-space genie_space: - name: ${var.genie_space_name} space_id: ${var.genie_space_id} permission: CAN_RUN @@ -99,7 +96,6 @@ targets: default: variables: genie_space_id: - genie_space_name: ``` **`app.yaml`** — add env injection: diff --git a/skills/databricks-apps/references/appkit/lakebase.md b/skills/databricks-apps/references/appkit/lakebase.md index c7ad167..cf42bc3 100644 --- a/skills/databricks-apps/references/appkit/lakebase.md +++ b/skills/databricks-apps/references/appkit/lakebase.md @@ -37,6 +37,8 @@ Where `` and `` are full resource names (e.g. `proje Use the `databricks-lakebase` skill to create a Lakebase project and discover branch/database resource names before running this command. +> For multi-environment deployments (dev/prod), use `variables:` and `targets:` blocks in `databricks.yml` — see the **`databricks-dabs`** skill for patterns. + **Get resource names** (if you have an existing project): ```bash # List branches → use the name field of a READY branch @@ -60,25 +62,22 @@ my-app/ Note: **No `config/queries/` directory** — Lakebase apps use server-side `pool.query()` calls, not SQL files. -## `createLakebasePool` API +## Lakebase Plugin API + +Access Lakebase through the plugin handle returned by `createApp()`: ```typescript -import { createLakebasePool } from "@databricks/lakebase"; -// or: import { createLakebasePool } from "@databricks/appkit"; - -const pool = createLakebasePool({ - // All fields optional — auto-populated from env vars when deployed - host: process.env.PGHOST, // Lakebase hostname - database: process.env.PGDATABASE, // Database name - endpoint: process.env.LAKEBASE_ENDPOINT, // Endpoint resource path - user: process.env.PGUSER, // Service principal client ID - max: 10, // Connection pool size - idleTimeoutMillis: 30000, - connectionTimeoutMillis: 10000, +import { createApp, server, lakebase } from "@databricks/appkit"; + +const AppKit = await createApp({ + plugins: [server(), lakebase()], }); + +// Query via the plugin handle — handles pooling and token refresh automatically +const result = await AppKit.lakebase.query("SELECT * FROM users WHERE id = $1", [userId]); ``` -Call `createLakebasePool()` **once at module level** (server startup), not inside request handlers. +The `lakebase()` plugin auto-configures from platform-injected env vars at deploy time. No manual pool setup needed. ## Environment Variables (auto-set when deployed with database resource) @@ -93,42 +92,39 @@ Call `createLakebasePool()` **once at module level** (server startup), not insid ## tRPC CRUD Pattern -Always use tRPC for Lakebase operations — do NOT call `pool.query()` from the client. +Always use tRPC for Lakebase operations — do NOT call `AppKit.lakebase.query()` from the client. ```typescript // server/server.ts -import { initTRPC } from '@trpc/server'; -import { createLakebasePool } from "@databricks/lakebase"; -import { z } from 'zod'; -import superjson from 'superjson'; // requires: npm install superjson - -const pool = createLakebasePool(); // reads env vars automatically +import { createApp, server, lakebase } from "@databricks/appkit"; -const t = initTRPC.create({ transformer: superjson }); -const publicProcedure = t.procedure; +const AppKit = await createApp({ + plugins: [server(), lakebase()], +}); -export const appRouter = t.router({ - listItems: publicProcedure.query(async () => { - const { rows } = await pool.query( +// Define routes using AppKit.lakebase.query() +AppKit.server.router({ + listItems: AppKit.server.procedure.query(async () => { + const { rows } = await AppKit.lakebase.query( "SELECT * FROM app_data.items ORDER BY created_at DESC LIMIT 100" ); return rows; }), - createItem: publicProcedure + createItem: AppKit.server.procedure .input(z.object({ name: z.string().min(1) })) .mutation(async ({ input }) => { - const { rows } = await pool.query( + const { rows } = await AppKit.lakebase.query( "INSERT INTO app_data.items (name) VALUES ($1) RETURNING *", [input.name] ); return rows[0]; }), - deleteItem: publicProcedure + deleteItem: AppKit.server.procedure .input(z.object({ id: z.number() })) .mutation(async ({ input }) => { - await pool.query("DELETE FROM app_data.items WHERE id = $1", [input.id]); + await AppKit.lakebase.query("DELETE FROM app_data.items WHERE id = $1", [input.id]); return { success: true }; }), }); @@ -142,7 +138,7 @@ export const appRouter = t.router({ ```typescript // server/server.ts — run once at startup before handling requests -await pool.query(` +await AppKit.lakebase.query(` CREATE SCHEMA IF NOT EXISTS app_data; CREATE TABLE IF NOT EXISTS app_data.items ( id SERIAL PRIMARY KEY, @@ -154,7 +150,7 @@ await pool.query(` ## ORM Integration (Optional) -The pool returned by `createLakebasePool()` is a standard `pg.Pool` — works with any PostgreSQL library: +The underlying pool is a standard `pg.Pool` — works with any PostgreSQL library: ```typescript // Drizzle ORM diff --git a/skills/databricks-apps/references/appkit/model-serving.md b/skills/databricks-apps/references/appkit/model-serving.md index 861ee4b..86d2f79 100644 --- a/skills/databricks-apps/references/appkit/model-serving.md +++ b/skills/databricks-apps/references/appkit/model-serving.md @@ -96,11 +96,12 @@ const answer = result.choices?.[0]?.message?.content; ## Streaming Chat Pattern -For real-time token streaming in a chat UI, the serving endpoint must be called with streaming enabled. The Databricks Apps reverse proxy enforces a **120-second timeout** on HTTP requests, so streaming responses must complete within that window. +AppKit's serving plugin provides **transparent SSE streaming** — it proxies the upstream endpoint's Server-Sent Events response directly to the client without buffering. No client-side AI SDK is needed for basic streaming. -- For streaming API details, check `npx @databricks/appkit docs` for the latest streaming support in AppKit -- If AppKit doesn't provide a built-in streaming pattern, use the OpenAI-compatible streaming API via `fetch` with `stream: true` in the request body and process the SSE response +- The proxy handles SSE headers and pipes the response stream +- The Databricks Apps reverse proxy enforces a **120-second timeout** — streaming responses must complete within that window - For interactions exceeding 120 seconds, use **WebSockets** instead of SSE — see [Platform Guide](../platform-guide.md) +- For the latest streaming API details: `npx @databricks/appkit docs` ## AI Gateway & Embeddings diff --git a/skills/databricks-core/SKILL.md b/skills/databricks-core/SKILL.md index b68096f..a85d56c 100644 --- a/skills/databricks-core/SKILL.md +++ b/skills/databricks-core/SKILL.md @@ -1,7 +1,7 @@ --- name: "databricks-core" description: "Databricks CLI operations: auth, profiles, data exploration, and bundles. Contains up-to-date guidelines for Databricks-related CLI tasks." -compatibility: Requires databricks CLI (>= v0.292.0) +compatibility: Requires databricks CLI (>= v0.296.0) metadata: version: "0.1.0" --- @@ -22,7 +22,7 @@ For specific products, use dedicated skills: ## Prerequisites 1. **CLI installed**: Run `databricks --version` to check. - - **If the CLI is missing or outdated (< v0.292.0): STOP. Do not proceed or work around a missing CLI.** + - **If the CLI is missing or outdated (< v0.296.0): STOP. Do not proceed or work around a missing CLI.** - **Read the [CLI Installation](databricks-cli-install.md) reference file and follow the instructions to guide the user through installation.** - Note: In sandboxed environments (Cursor IDE, containers), install commands write outside the workspace and may be blocked. Present the install command to the user and ask them to run it in their own terminal. - **Exception for off-platform tasks:** If CLI installation is blocked (sandboxed containers, restricted environments) and the task does NOT require deploying to Databricks, fall back to direct REST API calls using `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables if present in the shell. See the `databricks-lakebase` connectivity guide for REST API patterns. diff --git a/skills/databricks-jobs/SKILL.md b/skills/databricks-jobs/SKILL.md index 125a31c..ce50030 100644 --- a/skills/databricks-jobs/SKILL.md +++ b/skills/databricks-jobs/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-jobs description: Develop and deploy Lakeflow Jobs on Databricks. Use when creating data engineering jobs with notebooks, Python wheels, or SQL tasks. Invoke BEFORE starting implementation. -compatibility: Requires databricks CLI (>= v0.292.0) +compatibility: Requires databricks CLI (>= v0.296.0) metadata: version: "0.1.0" parent: databricks-core diff --git a/skills/databricks-lakebase/SKILL.md b/skills/databricks-lakebase/SKILL.md index 3cd0e93..4443100 100644 --- a/skills/databricks-lakebase/SKILL.md +++ b/skills/databricks-lakebase/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-lakebase description: "Databricks Lakebase Postgres: projects, scaling, connectivity, Lakebase synced tables, and Data API. Use when asked about Lakebase databases, OLTP storage, or connecting apps to Postgres on Databricks." -compatibility: Requires databricks CLI (>= v0.294.0) +compatibility: Requires databricks CLI (>= v0.296.0) metadata: version: "0.1.0" parent: databricks-core diff --git a/skills/databricks-model-serving/SKILL.md b/skills/databricks-model-serving/SKILL.md index eafedbf..2945b3e 100644 --- a/skills/databricks-model-serving/SKILL.md +++ b/skills/databricks-model-serving/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-model-serving description: "Manage Databricks Model Serving endpoints via CLI. Use when asked to create, configure, query, or manage model serving endpoints for LLM inference, custom models, or external models." -compatibility: Requires databricks CLI (>= v0.294.0) +compatibility: Requires databricks CLI (>= v0.296.0) metadata: version: "0.1.0" parent: databricks-core diff --git a/skills/databricks-pipelines/SKILL.md b/skills/databricks-pipelines/SKILL.md index 25d95d6..4f12845 100644 --- a/skills/databricks-pipelines/SKILL.md +++ b/skills/databricks-pipelines/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-pipelines description: Develop Lakeflow Spark Declarative Pipelines (formerly Delta Live Tables) on Databricks. Use when building batch or streaming data pipelines with Python or SQL. Invoke BEFORE starting implementation. -compatibility: Requires databricks CLI (>= v0.292.0) +compatibility: Requires databricks CLI (>= v0.296.0) metadata: version: "0.1.0" parent: databricks-core diff --git a/skills/databricks-serverless-migration/SKILL.md b/skills/databricks-serverless-migration/SKILL.md index 859d01a..76f10f3 100644 --- a/skills/databricks-serverless-migration/SKILL.md +++ b/skills/databricks-serverless-migration/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-serverless-migration description: "Migrate Databricks workloads from classic compute to serverless compute. Scans code for serverless compatibility issues, provides concrete fixes for the serverless Spark Connect architecture, and guides the full migration to serverless environments. Use for classic-to-serverless migrations, serverless code compatibility checks, or writing new serverless-compatible notebooks and jobs. Not for classic DBR version upgrades or cluster configuration changes within classic compute." -compatibility: Requires databricks CLI (>= v0.292.0) +compatibility: Requires databricks CLI (>= v0.296.0) metadata: version: "0.1.0" parent: databricks-core From d79c945ca789ab231dcedc781577c8681accfb3c Mon Sep 17 00:00:00 2001 From: Pawel Kosiec Date: Thu, 7 May 2026 17:20:15 +0200 Subject: [PATCH 4/9] Revert incorrect changes from previous commits - Revert redundant user_api_scopes in files.md (auto-generated by apps init) - Restore genie_space_name variable (confirmed in actual scaffolding output) - Replace multi-space Genie section with pointer to AppKit docs - Revert Jobs and Pipelines troubleshooting tables (not report-driven) - Revert CLI version bumps to original values - Update connectivity.md cross-reference to match new plugin pattern Co-authored-by: Isaac --- skills/databricks-apps/SKILL.md | 2 +- .../references/appkit/files.md | 10 ---- .../references/appkit/genie.md | 53 ++----------------- skills/databricks-core/SKILL.md | 4 +- skills/databricks-jobs/SKILL.md | 13 +---- skills/databricks-lakebase/SKILL.md | 2 +- .../references/connectivity.md | 2 +- skills/databricks-model-serving/SKILL.md | 2 +- skills/databricks-pipelines/SKILL.md | 13 +---- .../databricks-serverless-migration/SKILL.md | 2 +- 10 files changed, 14 insertions(+), 89 deletions(-) diff --git a/skills/databricks-apps/SKILL.md b/skills/databricks-apps/SKILL.md index 2f94340..515fa9c 100644 --- a/skills/databricks-apps/SKILL.md +++ b/skills/databricks-apps/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-apps description: "Build apps on Databricks Apps platform. Use when asked to create dashboards, data apps, analytics tools, or visualizations. Evaluates data access patterns (analytics vs Lakebase synced tables) before scaffolding. Invoke BEFORE starting implementation." -compatibility: Requires databricks CLI (>= v0.296.0) +compatibility: Requires databricks CLI (>= v0.294.0) metadata: version: "0.1.1" parent: databricks-core diff --git a/skills/databricks-apps/references/appkit/files.md b/skills/databricks-apps/references/appkit/files.md index aef6cb9..3432f09 100644 --- a/skills/databricks-apps/references/appkit/files.md +++ b/skills/databricks-apps/references/appkit/files.md @@ -244,16 +244,6 @@ resources: permission: WRITE_VOLUME ``` -The files plugin requires OBO user token passthrough for user-level file access. Add `user_api_scopes` to `databricks.yml`: - -```yaml -resources: - apps: - my_app: - user_api_scopes: - - files.files -``` - Wire the env var in `app.yaml`: ```yaml diff --git a/skills/databricks-apps/references/appkit/genie.md b/skills/databricks-apps/references/appkit/genie.md index aab6e5b..7650090 100644 --- a/skills/databricks-apps/references/appkit/genie.md +++ b/skills/databricks-apps/references/appkit/genie.md @@ -81,6 +81,8 @@ npm install && npm run dev variables: genie_space_id: description: Genie Space ID + genie_space_name: + description: Genie Space name resources: apps: @@ -89,6 +91,7 @@ resources: # ... existing resources ... - name: genie-space genie_space: + name: ${var.genie_space_name} space_id: ${var.genie_space_id} permission: CAN_RUN @@ -96,6 +99,7 @@ targets: default: variables: genie_space_id: + genie_space_name: ``` **`app.yaml`** — add env injection: @@ -141,54 +145,7 @@ function GeniePage() { Update smoke tests if headings or routes changed, then `databricks apps validate`. -## Multiple Genie Spaces - -To build an app that lets users switch between multiple Genie spaces (e.g., different datasets or domains): - -**`databricks.yml`** — declare multiple Genie resources with distinct aliases: - -```yaml -variables: - genie_space_sales_id: - description: Sales Genie Space ID - genie_space_support_id: - description: Support Genie Space ID - -resources: - apps: - app: - resources: - - name: genie-sales - genie_space: - space_id: ${var.genie_space_sales_id} - permission: CAN_RUN - - name: genie-support - genie_space: - space_id: ${var.genie_space_support_id} - permission: CAN_RUN -``` - -**`app.yaml`** — inject each space ID as a separate env var: - -```yaml -env: - - name: GENIE_SPACE_SALES - valueFrom: genie-sales - - name: GENIE_SPACE_SUPPORT - valueFrom: genie-support -``` - -**`server/server.ts`** — register the genie plugin once; it reads space IDs from env vars. Each alias becomes a separate `/api/genie/:alias/messages` endpoint. The client-side space selector routes messages to the correct alias. - -**Frontend** — build a selector that switches between spaces: - -```tsx -const spaces = [ - { alias: "sales", label: "Sales Analytics" }, - { alias: "support", label: "Support Metrics" }, -]; -// Route to /api/genie/{alias}/messages based on user selection -``` +For multi-space apps (switching between Genie spaces), see `npx @databricks/appkit docs ./docs/plugins/genie.md`. ## Frontend diff --git a/skills/databricks-core/SKILL.md b/skills/databricks-core/SKILL.md index a85d56c..b68096f 100644 --- a/skills/databricks-core/SKILL.md +++ b/skills/databricks-core/SKILL.md @@ -1,7 +1,7 @@ --- name: "databricks-core" description: "Databricks CLI operations: auth, profiles, data exploration, and bundles. Contains up-to-date guidelines for Databricks-related CLI tasks." -compatibility: Requires databricks CLI (>= v0.296.0) +compatibility: Requires databricks CLI (>= v0.292.0) metadata: version: "0.1.0" --- @@ -22,7 +22,7 @@ For specific products, use dedicated skills: ## Prerequisites 1. **CLI installed**: Run `databricks --version` to check. - - **If the CLI is missing or outdated (< v0.296.0): STOP. Do not proceed or work around a missing CLI.** + - **If the CLI is missing or outdated (< v0.292.0): STOP. Do not proceed or work around a missing CLI.** - **Read the [CLI Installation](databricks-cli-install.md) reference file and follow the instructions to guide the user through installation.** - Note: In sandboxed environments (Cursor IDE, containers), install commands write outside the workspace and may be blocked. Present the install command to the user and ask them to run it in their own terminal. - **Exception for off-platform tasks:** If CLI installation is blocked (sandboxed containers, restricted environments) and the task does NOT require deploying to Databricks, fall back to direct REST API calls using `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables if present in the shell. See the `databricks-lakebase` connectivity guide for REST API patterns. diff --git a/skills/databricks-jobs/SKILL.md b/skills/databricks-jobs/SKILL.md index ce50030..f9986c1 100644 --- a/skills/databricks-jobs/SKILL.md +++ b/skills/databricks-jobs/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-jobs description: Develop and deploy Lakeflow Jobs on Databricks. Use when creating data engineering jobs with notebooks, Python wheels, or SQL tasks. Invoke BEFORE starting implementation. -compatibility: Requires databricks CLI (>= v0.296.0) +compatibility: Requires databricks CLI (>= v0.292.0) metadata: version: "0.1.0" parent: databricks-core @@ -182,17 +182,6 @@ uv run pytest 3. **Run**: `databricks bundle run -t dev --profile ` 4. **Check run status**: `databricks jobs get-run --run-id --profile ` -## Troubleshooting - -| Error | Cause | Fix | -|-------|-------|-----| -| `PERMISSION_DENIED` on deploy | User or SP lacks workspace permissions | Grant `CAN_MANAGE` on the job, or `CAN_MANAGE_RUN` for run-only | -| `RESOURCE_DOES_NOT_EXIST` for notebook | Wrong path or not deployed | Check `notebook_path` is relative to bundle root (`../src/`); deploy first | -| `TABLE_OR_VIEW_NOT_FOUND` in job run | Catalog/schema not accessible from serverless compute | Verify UC permissions; ensure `catalog` and `schema` params resolve correctly | -| `InvalidParameterValue` for task | Malformed YAML task config | Validate with `databricks bundle validate --strict --profile ` | -| Run stuck in PENDING | No available compute | Check cluster policy quotas or switch to serverless compute | -| Job run fails silently | Task dependency not met | Check `depends_on` chains; a failed upstream task skips downstream tasks | - ## Documentation - Lakeflow Jobs: https://docs.databricks.com/jobs diff --git a/skills/databricks-lakebase/SKILL.md b/skills/databricks-lakebase/SKILL.md index 4443100..3cd0e93 100644 --- a/skills/databricks-lakebase/SKILL.md +++ b/skills/databricks-lakebase/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-lakebase description: "Databricks Lakebase Postgres: projects, scaling, connectivity, Lakebase synced tables, and Data API. Use when asked about Lakebase databases, OLTP storage, or connecting apps to Postgres on Databricks." -compatibility: Requires databricks CLI (>= v0.296.0) +compatibility: Requires databricks CLI (>= v0.294.0) metadata: version: "0.1.0" parent: databricks-core diff --git a/skills/databricks-lakebase/references/connectivity.md b/skills/databricks-lakebase/references/connectivity.md index 498b9d5..8ec356d 100644 --- a/skills/databricks-lakebase/references/connectivity.md +++ b/skills/databricks-lakebase/references/connectivity.md @@ -11,7 +11,7 @@ ## Connection Patterns (Python) -> **JavaScript/TypeScript Databricks Apps** using AppKit get Lakebase connectivity fully auto-injected via `createLakebasePool()` — see the **`databricks-apps`** skill. +> **JavaScript/TypeScript Databricks Apps** using AppKit get Lakebase connectivity via the `lakebase()` plugin — see the **`databricks-apps`** skill's [Lakebase guide](../../databricks-apps/references/appkit/lakebase.md). ### Pattern 1: Direct Connection (Scripts/Notebooks) diff --git a/skills/databricks-model-serving/SKILL.md b/skills/databricks-model-serving/SKILL.md index 2945b3e..eafedbf 100644 --- a/skills/databricks-model-serving/SKILL.md +++ b/skills/databricks-model-serving/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-model-serving description: "Manage Databricks Model Serving endpoints via CLI. Use when asked to create, configure, query, or manage model serving endpoints for LLM inference, custom models, or external models." -compatibility: Requires databricks CLI (>= v0.296.0) +compatibility: Requires databricks CLI (>= v0.294.0) metadata: version: "0.1.0" parent: databricks-core diff --git a/skills/databricks-pipelines/SKILL.md b/skills/databricks-pipelines/SKILL.md index 4f12845..d08d0b1 100644 --- a/skills/databricks-pipelines/SKILL.md +++ b/skills/databricks-pipelines/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-pipelines description: Develop Lakeflow Spark Declarative Pipelines (formerly Delta Live Tables) on Databricks. Use when building batch or streaming data pipelines with Python or SQL. Invoke BEFORE starting implementation. -compatibility: Requires databricks CLI (>= v0.296.0) +compatibility: Requires databricks CLI (>= v0.292.0) metadata: version: "0.1.0" parent: databricks-core @@ -255,17 +255,6 @@ resources: 3. **Run pipeline**: `databricks bundle run -t dev --profile ` 4. **Check status**: `databricks pipelines get --pipeline-id --profile ` -## Troubleshooting - -| Error | Cause | Fix | -|-------|-------|-----| -| `PERMISSION_DENIED` on pipeline run | User or SP lacks pipeline or catalog permissions | Grant `CAN_MANAGE` on pipeline; verify UC catalog/schema grants | -| `Table or view not found` | Target catalog/schema misconfigured or not deployed | Check `catalog` and `target` in pipeline settings; deploy first | -| `UpdateError: Cannot change dataset type` | Tried to change ST→MV or MV→ST | Manually drop the existing table, then deploy and run again | -| Pipeline stuck in STARTING | Compute provisioning issue | Check cluster policy quotas; use serverless compute | -| Full refresh data loss | `full_refresh` on production pipeline deletes and recreates all tables | Use selective refresh (`--refresh
`) unless full refresh is explicitly needed | -| `DeltaStreamIllegalStateException` | Streaming checkpoint corrupted or source schema changed | Try `full_refresh` on the affected table only, or delete checkpoint | - ## Pipeline API Reference Detailed reference guides for each pipeline API. **Read the relevant guide before writing pipeline code.** diff --git a/skills/databricks-serverless-migration/SKILL.md b/skills/databricks-serverless-migration/SKILL.md index 76f10f3..859d01a 100644 --- a/skills/databricks-serverless-migration/SKILL.md +++ b/skills/databricks-serverless-migration/SKILL.md @@ -1,7 +1,7 @@ --- name: databricks-serverless-migration description: "Migrate Databricks workloads from classic compute to serverless compute. Scans code for serverless compatibility issues, provides concrete fixes for the serverless Spark Connect architecture, and guides the full migration to serverless environments. Use for classic-to-serverless migrations, serverless code compatibility checks, or writing new serverless-compatible notebooks and jobs. Not for classic DBR version upgrades or cluster configuration changes within classic compute." -compatibility: Requires databricks CLI (>= v0.296.0) +compatibility: Requires databricks CLI (>= v0.292.0) metadata: version: "0.1.0" parent: databricks-core From 3fd16cb715bdfa52b5ea3f09799f14dcd28c2c0c Mon Sep 17 00:00:00 2001 From: Pawel Kosiec Date: Thu, 7 May 2026 18:00:20 +0200 Subject: [PATCH 5/9] Polish: scaffolding note, ORM via plugin, docs pointers - Clarify Lakebase plugin pattern requires scaffolding first - Update ORM integration to use AppKit.lakebase.pool / getOrmConfig() - Fix stale pool.query() references in synced tables section - Replace streaming/AI Gateway sections with AppKit docs pointer - Add AI Gateway note linking to official docs - Remove app-focused Getting Started from databricks-core Co-authored-by: Isaac --- .../references/appkit/lakebase.md | 16 +++++++++------- .../references/appkit/model-serving.md | 17 ++--------------- skills/databricks-core/SKILL.md | 12 ------------ 3 files changed, 11 insertions(+), 34 deletions(-) diff --git a/skills/databricks-apps/references/appkit/lakebase.md b/skills/databricks-apps/references/appkit/lakebase.md index cf42bc3..92554e1 100644 --- a/skills/databricks-apps/references/appkit/lakebase.md +++ b/skills/databricks-apps/references/appkit/lakebase.md @@ -64,7 +64,7 @@ Note: **No `config/queries/` directory** — Lakebase apps use server-side `pool ## Lakebase Plugin API -Access Lakebase through the plugin handle returned by `createApp()`: +Scaffolding with `--features lakebase` (see above) generates this pattern. Access Lakebase through the plugin handle returned by `createApp()`: ```typescript import { createApp, server, lakebase } from "@databricks/appkit"; @@ -150,26 +150,28 @@ await AppKit.lakebase.query(` ## ORM Integration (Optional) -The underlying pool is a standard `pg.Pool` — works with any PostgreSQL library: +The plugin exposes the raw `pg.Pool` via `AppKit.lakebase.pool` — works with any PostgreSQL library: ```typescript // Drizzle ORM import { drizzle } from "drizzle-orm/node-postgres"; -const db = drizzle(pool); +const db = drizzle(AppKit.lakebase.pool); // Prisma (with @prisma/adapter-pg) import { PrismaPg } from "@prisma/adapter-pg"; -const adapter = new PrismaPg(pool); +const adapter = new PrismaPg(AppKit.lakebase.pool); const prisma = new PrismaClient({ adapter }); ``` +For ORM-compatible config: `AppKit.lakebase.getOrmConfig()`. + ## Reading from Lakebase synced tables Lakebase synced tables materialize Delta/UC tables into Lakebase Postgres for low-latency app reads. The lakehouse remains the source of truth; Lakebase serves as a read-optimized index. **Architecture:** ``` -Delta gold tables → Synced tables (read-only) → App reads via pool.query() +Delta gold tables → Synced tables (read-only) → App reads via AppKit.lakebase.query() App writes → Lakebase OLTP tables → optional Lakehouse Sync → Delta ``` @@ -179,7 +181,7 @@ App writes → Lakebase OLTP tables → optional Lakehouse Sync ### How It Works -Synced tables (created via `databricks postgres create-synced-table`) appear as regular Postgres tables. From the app's perspective, use the same `pool.query()` pattern but **read-only**. +Synced tables (created via `databricks postgres create-synced-table`) appear as regular Postgres tables. From the app's perspective, use the same `AppKit.lakebase.query()` pattern but **read-only**. **Key differences from CRUD tables:** @@ -197,7 +199,7 @@ Synced tables (created via `databricks postgres create-synced-table`) appear as ```typescript topPickups: publicProcedure.query(async () => { - const { rows } = await pool.query(` + const { rows } = await AppKit.lakebase.query(` SELECT pickup_zip, COUNT(*) AS trip_count, AVG(fare_amount) AS avg_fare FROM public.nyc_trips GROUP BY pickup_zip diff --git a/skills/databricks-apps/references/appkit/model-serving.md b/skills/databricks-apps/references/appkit/model-serving.md index 86d2f79..586e19e 100644 --- a/skills/databricks-apps/references/appkit/model-serving.md +++ b/skills/databricks-apps/references/appkit/model-serving.md @@ -94,22 +94,9 @@ const result = await trpc.queryModel.query({ prompt: userInput }); const answer = result.choices?.[0]?.message?.content; ``` -## Streaming Chat Pattern +For streaming and advanced patterns, see `npx @databricks/appkit docs ./docs/plugins/model-serving.md`. -AppKit's serving plugin provides **transparent SSE streaming** — it proxies the upstream endpoint's Server-Sent Events response directly to the client without buffering. No client-side AI SDK is needed for basic streaming. - -- The proxy handles SSE headers and pipes the response stream -- The Databricks Apps reverse proxy enforces a **120-second timeout** — streaming responses must complete within that window -- For interactions exceeding 120 seconds, use **WebSockets** instead of SSE — see [Platform Guide](../platform-guide.md) -- For the latest streaming API details: `npx @databricks/appkit docs` - -## AI Gateway & Embeddings - -AI Gateway foundation model endpoints (available in the `system.ai` catalog) are called the same way as custom serving endpoints — use the tRPC pattern above with the endpoint name. - -**Embeddings vs chat:** Embedding endpoints use a different request shape — `input` field instead of `messages`. Use `databricks serving-endpoints get-open-api ` from the `databricks-model-serving` skill to discover the expected input/output schema for any endpoint. - -**Storing embeddings:** For similarity search, store embeddings in Lakebase with pgvector — see the **`databricks-lakebase`** skill's pgvector section for `VECTOR` column types, indexes, and query patterns. +AppKit integrates with **Model Serving endpoints**. To add AI Gateway features (rate limits, usage tracking, guardrails), configure them on the Model Serving endpoint — see [AI Gateway docs](https://docs.databricks.com/aws/en/ai-gateway/overview-beta). ## Troubleshooting diff --git a/skills/databricks-core/SKILL.md b/skills/databricks-core/SKILL.md index b68096f..16dec79 100644 --- a/skills/databricks-core/SKILL.md +++ b/skills/databricks-core/SKILL.md @@ -135,18 +135,6 @@ databricks bundle run -t --profile | Exploring tables/schemas | [Data Exploration](data-exploration.md) | | Deploying jobs/pipelines | Use `/databricks-dabs` | -## Getting Started (End-to-End) - -For a full local-to-production workflow: - -1. **Install CLI** → [CLI Installation](databricks-cli-install.md) -2. **Authenticate** → [CLI Authentication](databricks-cli-auth.md) → select profile -3. **Discover resources** → `databricks experimental aitools tools get-default-warehouse`, `databricks catalogs list` -4. **Scaffold app** → `databricks apps init --name --features <...> --set <...> --profile ` (see `databricks-apps` skill) -5. **Local dev** → `cd && npm install && npm run dev` -6. **Deploy** → `databricks apps deploy --profile ` -7. **Verify** → `databricks apps get --profile ` → check `app_status: RUNNING` - ## Reference Guides - [CLI Installation](databricks-cli-install.md) From 293c02508567971ce121a0e4d260542dd983a802 Mon Sep 17 00:00:00 2001 From: Pawel Kosiec Date: Thu, 7 May 2026 18:12:26 +0200 Subject: [PATCH 6/9] Fix AI Gateway link, trim Pattern 5, remove OBO note - Remove misleading OBO vs SP-only note (apps init handles scopes) - Fix AI Gateway: note beta endpoints unsupported, point to databricks-model-serving skill instead of incompatible docs - Fix CLI exception: link to REST API docs, not Lakebase skill - Trim off-platform Pattern 5 to minimal example + npm view readme Co-authored-by: Isaac --- .../references/appkit/model-serving.md | 2 +- .../references/platform-guide.md | 2 -- skills/databricks-core/SKILL.md | 2 +- .../references/connectivity.md | 23 +++---------------- 4 files changed, 5 insertions(+), 24 deletions(-) diff --git a/skills/databricks-apps/references/appkit/model-serving.md b/skills/databricks-apps/references/appkit/model-serving.md index 586e19e..967f8cb 100644 --- a/skills/databricks-apps/references/appkit/model-serving.md +++ b/skills/databricks-apps/references/appkit/model-serving.md @@ -96,7 +96,7 @@ const answer = result.choices?.[0]?.message?.content; For streaming and advanced patterns, see `npx @databricks/appkit docs ./docs/plugins/model-serving.md`. -AppKit integrates with **Model Serving endpoints**. To add AI Gateway features (rate limits, usage tracking, guardrails), configure them on the Model Serving endpoint — see [AI Gateway docs](https://docs.databricks.com/aws/en/ai-gateway/overview-beta). +AppKit integrates with **Model Serving endpoints**. AI Gateway (beta) endpoints are not directly supported — use the underlying Model Serving endpoint name instead. AI Gateway features (rate limits, usage tracking) can be configured on Model Serving endpoints via the `databricks-model-serving` skill. ## Troubleshooting diff --git a/skills/databricks-apps/references/platform-guide.md b/skills/databricks-apps/references/platform-guide.md index 66e5a4e..8b46658 100644 --- a/skills/databricks-apps/references/platform-guide.md +++ b/skills/databricks-apps/references/platform-guide.md @@ -101,8 +101,6 @@ env: ⚠️ Databricks blocks access outside approved scopes even if the user has permission. -**When to use OBO vs SP-only:** Most apps work fine with SP-only authentication. Only add `user_api_scopes` when the app needs to act as the logged-in user (e.g., user-scoped file access, per-user Genie queries). If unsure, start without it — you can add OBO scopes later if needed. Adding `user_api_scopes` requires the workspace to have user token passthrough enabled; if it's not enabled, `bundle deploy` will fail. - ## Deployment Workflow ⚠️ **USER CONSENT REQUIRED** — always confirm with the user before deploying. diff --git a/skills/databricks-core/SKILL.md b/skills/databricks-core/SKILL.md index 16dec79..0bf4682 100644 --- a/skills/databricks-core/SKILL.md +++ b/skills/databricks-core/SKILL.md @@ -25,7 +25,7 @@ For specific products, use dedicated skills: - **If the CLI is missing or outdated (< v0.292.0): STOP. Do not proceed or work around a missing CLI.** - **Read the [CLI Installation](databricks-cli-install.md) reference file and follow the instructions to guide the user through installation.** - Note: In sandboxed environments (Cursor IDE, containers), install commands write outside the workspace and may be blocked. Present the install command to the user and ask them to run it in their own terminal. - - **Exception for off-platform tasks:** If CLI installation is blocked (sandboxed containers, restricted environments) and the task does NOT require deploying to Databricks, fall back to direct REST API calls using `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables if present in the shell. See the `databricks-lakebase` connectivity guide for REST API patterns. + - **Exception for off-platform tasks:** If CLI installation is blocked (sandboxed containers, restricted environments) and the task does NOT require deploying to Databricks, fall back to direct REST API calls using `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables if present in the shell. See the [Databricks REST API docs](https://docs.databricks.com/api/workspace/introduction). 2. **Authenticated**: `databricks auth profiles` - If not: see [CLI Authentication](databricks-cli-auth.md) diff --git a/skills/databricks-lakebase/references/connectivity.md b/skills/databricks-lakebase/references/connectivity.md index 8ec356d..2fbdc91 100644 --- a/skills/databricks-lakebase/references/connectivity.md +++ b/skills/databricks-lakebase/references/connectivity.md @@ -133,33 +133,16 @@ For production apps, combine with Pattern 2's token refresh loop and SQLAlchemy ### Pattern 5: Off-Platform Apps (TypeScript/Node.js) -For apps running outside Databricks (external servers, local dev, CI/CD) that connect to Lakebase. Use the `@databricks/lakebase` package — it works standalone without AppKit. - -```bash -npm install @databricks/lakebase -``` +For apps running outside Databricks (external servers, local dev, CI/CD), use the `@databricks/lakebase` package — it works standalone without AppKit and handles OAuth token refresh, SSL, and connection pooling automatically. ```typescript import { createLakebasePool } from "@databricks/lakebase"; -const pool = createLakebasePool({ - host: process.env.PGHOST, - database: process.env.PGDATABASE, - endpoint: process.env.LAKEBASE_ENDPOINT, - // Authentication: reads from DATABRICKS_HOST + DATABRICKS_TOKEN, - // .databrickscfg, or explicit workspaceClient -}); - -// Returns a standard pg.Pool — works with Drizzle, Prisma, or any PostgreSQL library +const pool = createLakebasePool({ host, database, endpoint }); const { rows } = await pool.query("SELECT * FROM my_table LIMIT 10"); ``` -**What `@databricks/lakebase` handles automatically:** -- **OAuth token refresh** — tokens expire after 1 hour; the package refreshes 2 minutes before expiry with request deduplication -- **SSL** — defaults to `sslmode=require` -- **Connection pooling** — configurable `max`, `idleTimeoutMillis`, `connectionTimeoutMillis` - -**Authentication chain** (in order): explicit `workspaceClient` → Databricks SDK default auth (`DATABRICKS_HOST` + `DATABRICKS_TOKEN`, `.databrickscfg`) → `currentUser.me()` API fallback. For native Postgres password auth (bypassing OAuth), pass `password` directly. +For full configuration, auth chain, and SSL details, run `npm view @databricks/lakebase readme`. ## Best Practices From a77190423a88dec0835187dd041347adcb68a092 Mon Sep 17 00:00:00 2001 From: Pawel Kosiec Date: Thu, 7 May 2026 18:23:58 +0200 Subject: [PATCH 7/9] Clean up deployment section, fix Lakehouse Sync, simplify CLI exception - Remove Verify Deployment + Deployment Recovery subsections (apps deploy already reports status; it's Option A, not a fallback) - Remove duplicate file size error and incorrect apps logs PAT entry - Fix token passthrough error: point to workspace admin enablement, not stripping OBO scopes - Fix Lakehouse Sync: Azure now supported, add Postgres 17 requirement, destination naming, permissions, partitioned table limitation - Simplify CLI exception: remove "does NOT require deploying" condition Co-authored-by: Isaac --- .../references/platform-guide.md | 35 +------------------ skills/databricks-core/SKILL.md | 2 +- .../references/synced-tables.md | 18 +++++++--- 3 files changed, 15 insertions(+), 40 deletions(-) diff --git a/skills/databricks-apps/references/platform-guide.md b/skills/databricks-apps/references/platform-guide.md index 8b46658..446ea3b 100644 --- a/skills/databricks-apps/references/platform-guide.md +++ b/skills/databricks-apps/references/platform-guide.md @@ -117,37 +117,6 @@ databricks bundle run -t --profile ❌ **Common mistake:** Running only `bundle deploy` and expecting the app to update. Deploy uploads code but does NOT apply config changes or restart the app. Use `databricks apps deploy` or add `bundle run` after `bundle deploy`. -### Verify Deployment - -After deploying, confirm the app is running: - -```bash -databricks apps get --profile -``` - -Check these fields in the response: -- `app_status`: should be `RUNNING` (may show `STARTING` immediately after deploy — poll until `RUNNING`) -- `compute_status`: should be `ACTIVE` -- `active_deployment.status.state`: should be `SUCCEEDED` - -For real-time log monitoring: `databricks apps logs --follow --profile ` - -### Deployment Recovery - -If `databricks bundle deploy` fails on terraform or infrastructure errors, `databricks apps deploy` is a direct fallback — it uploads source code and applies `app.yaml` without the bundle/terraform layer: - -```bash -# Fallback when bundle deploy fails -databricks apps deploy --profile -``` - -If deployment fails with `File is larger than 10485760 bytes` pointing to files inside `.databricks/bundle/` (terraform binary cached by the bundle), delete the `.databricks/` directory and retry: - -```bash -rm -rf .databricks/ -databricks apps deploy --profile -``` - ### ⚠️ Destructive Updates Warning `databricks apps update` (and `bundle run`) performs a **full replacement**, not a merge: @@ -201,6 +170,4 @@ For long-running agent interactions, use **WebSockets** instead of SSE. | OBO scopes missing after deploy | Destructive update wiped them | Re-apply scopes after each deploy | | `${var.xxx}` appears literally in env | Variables not resolved in config | Use literal values, not bundle variables | | 504 Gateway Timeout | Request exceeded 120s | Use WebSockets for long operations | -| `user token passthrough not enabled` | `user_api_scopes` in `databricks.yml` requires OBO auth, which is not enabled in the workspace | Remove the `user_api_scopes` block if the app doesn't need user-specific data — app runs under SP permissions instead | -| `File is larger than 10485760 bytes` in `.databricks/` | Terraform binary cached by bundle exceeds workspace file upload limit | Delete `.databricks/` directory and use `databricks apps deploy` instead of `bundle deploy` | -| `databricks apps logs` auth error | PAT authentication is incompatible with `databricks apps logs` | Use OAuth authentication: run `databricks auth login` instead of PAT-based profile | +| `user token passthrough not enabled` | `user_api_scopes` in `databricks.yml` requires user authorization, which is not enabled in the workspace | Ask workspace admin to enable user authorization (Public Preview). See [Databricks Apps auth docs](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/auth#user-authorization) | diff --git a/skills/databricks-core/SKILL.md b/skills/databricks-core/SKILL.md index 0bf4682..544f29a 100644 --- a/skills/databricks-core/SKILL.md +++ b/skills/databricks-core/SKILL.md @@ -25,7 +25,7 @@ For specific products, use dedicated skills: - **If the CLI is missing or outdated (< v0.292.0): STOP. Do not proceed or work around a missing CLI.** - **Read the [CLI Installation](databricks-cli-install.md) reference file and follow the instructions to guide the user through installation.** - Note: In sandboxed environments (Cursor IDE, containers), install commands write outside the workspace and may be blocked. Present the install command to the user and ask them to run it in their own terminal. - - **Exception for off-platform tasks:** If CLI installation is blocked (sandboxed containers, restricted environments) and the task does NOT require deploying to Databricks, fall back to direct REST API calls using `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables if present in the shell. See the [Databricks REST API docs](https://docs.databricks.com/api/workspace/introduction). + - **Exception:** If CLI installation is blocked (sandboxed containers, restricted environments), fall back to direct REST API calls using `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables if present in the shell. See the [Databricks REST API docs](https://docs.databricks.com/api/workspace/introduction). 2. **Authenticated**: `databricks auth profiles` - If not: see [CLI Authentication](databricks-cli-auth.md) diff --git a/skills/databricks-lakebase/references/synced-tables.md b/skills/databricks-lakebase/references/synced-tables.md index 6822b87..30b1a46 100644 --- a/skills/databricks-lakebase/references/synced-tables.md +++ b/skills/databricks-lakebase/references/synced-tables.md @@ -186,24 +186,32 @@ If a Databricks App reads synced tables, the app's Service Principal needs expli - **Naming:** Database, schema, and table names allow `[A-Za-z0-9_]+` only - **Schema evolution:** Only additive changes (adding columns) for Triggered/Continuous modes -## Lakehouse Sync (Beta, AWS only) +## Lakehouse Sync (Beta) -Reverse direction: continuously streams changes **from** Lakebase Postgres **into** Unity Catalog Delta tables using CDC (SCD Type 2 history). Enables analytics and downstream pipelines on OLTP-written data. Does not require external compute, pipelines, or jobs — it is a native Lakebase feature. Azure support not yet available. +Reverse direction: continuously streams changes **from** Lakebase Postgres **into** Unity Catalog Delta tables using CDC (SCD Type 2 history). Destination tables are named `lb__history`. Does not require external compute, pipelines, or jobs — it is a native Lakebase feature. Available on AWS and Azure. -**Lakehouse Sync enablement is a UI-only action** — configured via the "Lakehouse sync" tab in the branch overview, not via CLI or API. It operates at the **schema level**: once enabled, all current and future tables in that schema sync to Unity Catalog as Delta tables. When automating CDC workflows, treat this as a manual post-automation step and inform the user. +**Lakehouse Sync enablement is a UI-only action** — configured via the "Lakehouse sync" tab in the branch overview, not via CLI or API. It operates at the **schema level**: once enabled, all current and future tables in that schema sync to Unity Catalog. When automating CDC workflows, treat this as a manual post-automation step and inform the user. **Prerequisites:** -- `REPLICA IDENTITY FULL` must be set on all source Postgres tables before enabling sync. This requires table ownership. +- Lakebase Autoscaling project running **Postgres 17** +- Tables must reside in the `databricks_postgres` database +- `REPLICA IDENTITY FULL` must be set on all source tables before enabling sync: ```sql ALTER TABLE .
REPLICA IDENTITY FULL; ``` -- Verify replica identity is set: +- Verify replica identity: ```sql SELECT n.nspname AS schema, c.relname AS table_name, CASE c.relreplident WHEN 'f' THEN 'full' WHEN 'd' THEN 'default' WHEN 'n' THEN 'nothing' END AS replica_identity FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE c.relkind = 'r' AND n.nspname = 'public'; ``` +- **Permissions:** CAN MANAGE on source project; USE CATALOG + USE SCHEMA + CREATE TABLE on destination +- Catalogs with default storage are **unsupported** + +**Limitations:** +- Partitioned tables are not supported +- Disabling and re-enabling sync does **not** re-snapshot — missing changes are lost permanently ## Use Cases From 25a9c580b9622979ae46a72c2b28860cfe88f687 Mon Sep 17 00:00:00 2001 From: Pawel Kosiec Date: Mon, 11 May 2026 12:23:22 +0200 Subject: [PATCH 8/9] Address review feedback: stale refs, Genie scope, REST fallback, Lakehouse Sync prereq - Update stale createLakebasePool references in overview.md and trpc.md - Broaden Genie docs pointer to not be limited to multi-space apps - REST fallback now asks user instead of auto-falling back - Surface databricks_postgres requirement earlier in Lakehouse Sync section - Regenerate manifest Co-authored-by: Isaac --- manifest.json | 16 ++++++++-------- .../databricks-apps/references/appkit/genie.md | 2 +- .../references/appkit/overview.md | 2 +- skills/databricks-apps/references/appkit/trpc.md | 2 +- skills/databricks-core/SKILL.md | 2 +- .../references/synced-tables.md | 2 ++ 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/manifest.json b/manifest.json index c6f1896..386c9fe 100644 --- a/manifest.json +++ b/manifest.json @@ -1,12 +1,12 @@ { "version": "2", - "updated_at": "2026-05-07T14:09:14Z", + "updated_at": "2026-05-11T10:23:09Z", "skills": { "databricks-apps": { "version": "0.1.1", "description": "Databricks Apps development and deployment (evaluates analytics vs synced tables data access)", "experimental": false, - "updated_at": "2026-05-07T14:08:13Z", + "updated_at": "2026-05-11T10:22:57Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -33,7 +33,7 @@ "version": "0.1.0", "description": "Core Databricks skill for CLI, auth, and data exploration", "experimental": false, - "updated_at": "2026-05-07T14:08:19Z", + "updated_at": "2026-05-11T10:22:59Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -66,7 +66,7 @@ "version": "0.1.0", "description": "Databricks Jobs orchestration and scheduling", "experimental": false, - "updated_at": "2026-05-07T13:50:17Z", + "updated_at": "2026-05-07T15:19:50Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -78,7 +78,7 @@ "version": "0.1.0", "description": "Databricks Lakebase Postgres: projects, scaling, connectivity, synced tables, and Data API", "experimental": false, - "updated_at": "2026-05-07T14:09:06Z", + "updated_at": "2026-05-11T10:23:05Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -93,7 +93,7 @@ "version": "0.1.0", "description": "Databricks Model Serving endpoint management", "experimental": false, - "updated_at": "2026-05-05T15:31:42Z", + "updated_at": "2026-05-07T15:19:45Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -105,7 +105,7 @@ "version": "0.1.0", "description": "Databricks Pipelines (DLT) for ETL and streaming", "experimental": false, - "updated_at": "2026-05-07T13:50:27Z", + "updated_at": "2026-05-07T15:19:55Z", "files": [ "SKILL.md", "agents/openai.yaml", @@ -152,7 +152,7 @@ "version": "0.1.0", "description": "Migrate Databricks workloads from classic compute to serverless compute, including compatibility checks and concrete fixes", "experimental": false, - "updated_at": "2026-05-05T15:31:42Z", + "updated_at": "2026-05-07T15:19:59Z", "files": [ "SKILL.md", "agents/openai.yaml", diff --git a/skills/databricks-apps/references/appkit/genie.md b/skills/databricks-apps/references/appkit/genie.md index 7650090..8320e35 100644 --- a/skills/databricks-apps/references/appkit/genie.md +++ b/skills/databricks-apps/references/appkit/genie.md @@ -145,7 +145,7 @@ function GeniePage() { Update smoke tests if headings or routes changed, then `databricks apps validate`. -For multi-space apps (switching between Genie spaces), see `npx @databricks/appkit docs ./docs/plugins/genie.md`. +For advanced Genie plugin usage, see `npx @databricks/appkit docs ./docs/plugins/genie.md`. ## Frontend diff --git a/skills/databricks-apps/references/appkit/overview.md b/skills/databricks-apps/references/appkit/overview.md index a0874e3..cec8587 100644 --- a/skills/databricks-apps/references/appkit/overview.md +++ b/skills/databricks-apps/references/appkit/overview.md @@ -126,7 +126,7 @@ Do not guess paths — run without args first, then pick from the index. | Use `useAnalyticsQuery` | [AppKit SDK](appkit-sdk.md) — memoization, conditional queries | | Add chart/table components | [Frontend](frontend.md) — component quick reference, anti-patterns | | Add API mutation endpoints | [tRPC](trpc.md) — only if you need server-side logic | -| Use Lakebase for CRUD / persistent state | [Lakebase](lakebase.md) — createLakebasePool, tRPC patterns, schema init | +| Use Lakebase for CRUD / persistent state | [Lakebase](lakebase.md) — Lakebase plugin API, tRPC patterns, schema init | | Add Genie chat | [Genie](genie.md) — space creation, plugin setup, frontend components | | Call ML model serving endpoints | [Model Serving](model-serving.md) — resource declaration, tRPC query pattern | | Trigger / monitor Lakeflow Jobs from the app | [Jobs](jobs.md) — env discovery, JobHandle API, SSE streaming | diff --git a/skills/databricks-apps/references/appkit/trpc.md b/skills/databricks-apps/references/appkit/trpc.md index 790c040..a51e578 100644 --- a/skills/databricks-apps/references/appkit/trpc.md +++ b/skills/databricks-apps/references/appkit/trpc.md @@ -43,7 +43,7 @@ databricks apps manifest --profile **Key plugins to check for:** - **analytics** — provides SQL warehouse query execution (do NOT reimplement with tRPC) -- **lakebase** — provides `createLakebasePool` for PostgreSQL CRUD (use pool in tRPC routes, don't create raw connections) +- **lakebase** — provides Lakebase plugin for PostgreSQL CRUD (use plugin in tRPC routes, don't create raw connections) - **genie** — provides Genie AI-powered data exploration (check before building custom natural-language-to-SQL routes) - **files** — provides file storage and retrieval helpers (check before writing custom file upload/download routes) diff --git a/skills/databricks-core/SKILL.md b/skills/databricks-core/SKILL.md index 544f29a..185fe96 100644 --- a/skills/databricks-core/SKILL.md +++ b/skills/databricks-core/SKILL.md @@ -25,7 +25,7 @@ For specific products, use dedicated skills: - **If the CLI is missing or outdated (< v0.292.0): STOP. Do not proceed or work around a missing CLI.** - **Read the [CLI Installation](databricks-cli-install.md) reference file and follow the instructions to guide the user through installation.** - Note: In sandboxed environments (Cursor IDE, containers), install commands write outside the workspace and may be blocked. Present the install command to the user and ask them to run it in their own terminal. - - **Exception:** If CLI installation is blocked (sandboxed containers, restricted environments), fall back to direct REST API calls using `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables if present in the shell. See the [Databricks REST API docs](https://docs.databricks.com/api/workspace/introduction). + - **Exception:** If CLI installation is blocked (sandboxed containers, restricted environments), ask the user whether to fall back to direct REST API calls using `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables if present in the shell. See the [Databricks REST API docs](https://docs.databricks.com/api/workspace/introduction). 2. **Authenticated**: `databricks auth profiles` - If not: see [CLI Authentication](databricks-cli-auth.md) diff --git a/skills/databricks-lakebase/references/synced-tables.md b/skills/databricks-lakebase/references/synced-tables.md index 30b1a46..9e74882 100644 --- a/skills/databricks-lakebase/references/synced-tables.md +++ b/skills/databricks-lakebase/references/synced-tables.md @@ -190,6 +190,8 @@ If a Databricks App reads synced tables, the app's Service Principal needs expli Reverse direction: continuously streams changes **from** Lakebase Postgres **into** Unity Catalog Delta tables using CDC (SCD Type 2 history). Destination tables are named `lb__history`. Does not require external compute, pipelines, or jobs — it is a native Lakebase feature. Available on AWS and Azure. +> **Important:** Tables must reside in the `databricks_postgres` database for Lakehouse Sync to work. + **Lakehouse Sync enablement is a UI-only action** — configured via the "Lakehouse sync" tab in the branch overview, not via CLI or API. It operates at the **schema level**: once enabled, all current and future tables in that schema sync to Unity Catalog. When automating CDC workflows, treat this as a manual post-automation step and inform the user. **Prerequisites:** From a970067032905f39fad59f13c603b154d1a161ac Mon Sep 17 00:00:00 2001 From: Pawel Kosiec Date: Mon, 11 May 2026 15:22:25 +0200 Subject: [PATCH 9/9] Fix Lakebase skill: replace hallucinated server.router/procedure with real Express pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AppKit.server.router() and AppKit.server.procedure do not exist in the AppKit server plugin. Confirmed by: - tsc --noEmit failure against AppKit 0.24.0 scaffold - AppKit source (ServerPlugin.exports() only exposes extend/getServer/getConfig) Replace with the correct pattern: server.extend() with Express routes, matching the scaffold-generated code. Also: - Rename "tRPC CRUD Pattern" → "CRUD Routes Pattern" - Use .then() callback pattern (compatible with current AppKit 0.24.0) - Update all code examples to use lowercase appkit variable - Fix synced tables example route - Update Key Differences table Co-authored-by: Isaac --- manifest.json | 4 +- .../references/appkit/lakebase.md | 124 ++++++++++-------- 2 files changed, 72 insertions(+), 56 deletions(-) diff --git a/manifest.json b/manifest.json index 386c9fe..bc7836a 100644 --- a/manifest.json +++ b/manifest.json @@ -1,12 +1,12 @@ { "version": "2", - "updated_at": "2026-05-11T10:23:09Z", + "updated_at": "2026-05-11T13:22:07Z", "skills": { "databricks-apps": { "version": "0.1.1", "description": "Databricks Apps development and deployment (evaluates analytics vs synced tables data access)", "experimental": false, - "updated_at": "2026-05-11T10:22:57Z", + "updated_at": "2026-05-11T13:22:01Z", "files": [ "SKILL.md", "agents/openai.yaml", diff --git a/skills/databricks-apps/references/appkit/lakebase.md b/skills/databricks-apps/references/appkit/lakebase.md index 92554e1..d178fb9 100644 --- a/skills/databricks-apps/references/appkit/lakebase.md +++ b/skills/databricks-apps/references/appkit/lakebase.md @@ -52,7 +52,7 @@ databricks postgres list-databases projects//branches/ -- ``` my-app/ ├── server/ -│ └── server.ts # Backend with Lakebase pool + tRPC routes +│ └── server.ts # Backend with Lakebase plugin + Express routes ├── client/ │ └── src/ │ └── App.tsx # React frontend @@ -60,21 +60,21 @@ my-app/ └── package.json # Includes @databricks/lakebase dependency ``` -Note: **No `config/queries/` directory** — Lakebase apps use server-side `pool.query()` calls, not SQL files. +Note: **No `config/queries/` directory** — Lakebase apps use server-side `appkit.lakebase.query()` calls, not SQL files. ## Lakebase Plugin API Scaffolding with `--features lakebase` (see above) generates this pattern. Access Lakebase through the plugin handle returned by `createApp()`: ```typescript -import { createApp, server, lakebase } from "@databricks/appkit"; +import { createApp, lakebase } from "@databricks/appkit"; -const AppKit = await createApp({ - plugins: [server(), lakebase()], +const appkit = await createApp({ + plugins: [lakebase()], }); // Query via the plugin handle — handles pooling and token refresh automatically -const result = await AppKit.lakebase.query("SELECT * FROM users WHERE id = $1", [userId]); +const result = await appkit.lakebase.query("SELECT * FROM users WHERE id = $1", [userId]); ``` The `lakebase()` plugin auto-configures from platform-injected env vars at deploy time. No manual pool setup needed. @@ -90,55 +90,70 @@ The `lakebase()` plugin auto-configures from platform-injected env vars at deplo | `PGSSLMODE` | SSL mode (`require`) | | `LAKEBASE_ENDPOINT` | Endpoint resource path | -## tRPC CRUD Pattern +## CRUD Routes Pattern -Always use tRPC for Lakebase operations — do NOT call `AppKit.lakebase.query()` from the client. +Always use server-side routes for Lakebase operations — do NOT call `appkit.lakebase.query()` from the client. Use `server.extend()` to register Express routes: ```typescript // server/server.ts import { createApp, server, lakebase } from "@databricks/appkit"; - -const AppKit = await createApp({ - plugins: [server(), lakebase()], -}); - -// Define routes using AppKit.lakebase.query() -AppKit.server.router({ - listItems: AppKit.server.procedure.query(async () => { - const { rows } = await AppKit.lakebase.query( - "SELECT * FROM app_data.items ORDER BY created_at DESC LIMIT 100" - ); - return rows; - }), - - createItem: AppKit.server.procedure - .input(z.object({ name: z.string().min(1) })) - .mutation(async ({ input }) => { - const { rows } = await AppKit.lakebase.query( - "INSERT INTO app_data.items (name) VALUES ($1) RETURNING *", - [input.name] +import { z } from 'zod'; + +createApp({ + plugins: [server({ autoStart: false }), lakebase()], +}) + .then(async (appkit) => { + // Schema init (runs once at startup) + await appkit.lakebase.query(` + CREATE SCHEMA IF NOT EXISTS app_data; + CREATE TABLE IF NOT EXISTS app_data.items ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW() ); - return rows[0]; - }), - - deleteItem: AppKit.server.procedure - .input(z.object({ id: z.number() })) - .mutation(async ({ input }) => { - await AppKit.lakebase.query("DELETE FROM app_data.items WHERE id = $1", [input.id]); - return { success: true }; - }), -}); + `); + + // CRUD routes via Express + appkit.server.extend((app) => { + app.get('/api/items', async (_req, res) => { + const { rows } = await appkit.lakebase.query( + "SELECT * FROM app_data.items ORDER BY created_at DESC LIMIT 100" + ); + res.json(rows); + }); + + app.post('/api/items', async (req, res) => { + const parsed = z.object({ name: z.string().min(1) }).safeParse(req.body); + if (!parsed.success) { res.status(400).json({ error: 'Invalid input' }); return; } + const { rows } = await appkit.lakebase.query( + "INSERT INTO app_data.items (name) VALUES ($1) RETURNING *", + [parsed.data.name] + ); + res.status(201).json(rows[0]); + }); + + app.delete('/api/items/:id', async (req, res) => { + const id = parseInt(req.params.id, 10); + if (isNaN(id)) { res.status(400).json({ error: 'Invalid id' }); return; } + await appkit.lakebase.query("DELETE FROM app_data.items WHERE id = $1", [id]); + res.status(204).send(); + }); + }); + + await appkit.server.start(); + }) + .catch(console.error); ``` > **Deploy first (App + Lakebase only)!** When your Databricks App uses Lakebase, the Service Principal must create and own the schema. Run `databricks apps deploy` before any local development. See **`databricks-lakebase`** skill's **Schema Permissions for Deployed Apps** for details. ## Schema Initialization -**Always create a custom schema** — the Service Principal cannot access any existing schemas (including `public`). It must create the schema itself to become its owner. See **`databricks-lakebase`** skill's **Schema Permissions for Deployed Apps** for the full permission model and deploy-first workflow. Initialize tables on server startup: +**Always create a custom schema** — the Service Principal cannot access any existing schemas (including `public`). It must create the schema itself to become its owner. See **`databricks-lakebase`** skill's **Schema Permissions for Deployed Apps** for the full permission model and deploy-first workflow. Initialize tables inside the `.then()` callback before registering routes (see CRUD pattern above): ```typescript -// server/server.ts — run once at startup before handling requests -await AppKit.lakebase.query(` +// Inside onPluginsReady — runs once at startup before handling requests +await appkit.lakebase.query(` CREATE SCHEMA IF NOT EXISTS app_data; CREATE TABLE IF NOT EXISTS app_data.items ( id SERIAL PRIMARY KEY, @@ -150,20 +165,20 @@ await AppKit.lakebase.query(` ## ORM Integration (Optional) -The plugin exposes the raw `pg.Pool` via `AppKit.lakebase.pool` — works with any PostgreSQL library: +The plugin exposes the raw `pg.Pool` via `appkit.lakebase.pool` — works with any PostgreSQL library: ```typescript // Drizzle ORM import { drizzle } from "drizzle-orm/node-postgres"; -const db = drizzle(AppKit.lakebase.pool); +const db = drizzle(appkit.lakebase.pool); // Prisma (with @prisma/adapter-pg) import { PrismaPg } from "@prisma/adapter-pg"; -const adapter = new PrismaPg(AppKit.lakebase.pool); +const adapter = new PrismaPg(appkit.lakebase.pool); const prisma = new PrismaClient({ adapter }); ``` -For ORM-compatible config: `AppKit.lakebase.getOrmConfig()`. +For ORM-compatible config: `appkit.lakebase.getOrmConfig()`. ## Reading from Lakebase synced tables @@ -171,7 +186,7 @@ Lakebase synced tables materialize Delta/UC tables into Lakebase Postgres for lo **Architecture:** ``` -Delta gold tables → Synced tables (read-only) → App reads via AppKit.lakebase.query() +Delta gold tables → Synced tables (read-only) → App reads via appkit.lakebase.query() App writes → Lakebase OLTP tables → optional Lakehouse Sync → Delta ``` @@ -181,7 +196,7 @@ App writes → Lakebase OLTP tables → optional Lakehouse Sync ### How It Works -Synced tables (created via `databricks postgres create-synced-table`) appear as regular Postgres tables. From the app's perspective, use the same `AppKit.lakebase.query()` pattern but **read-only**. +Synced tables (created via `databricks postgres create-synced-table`) appear as regular Postgres tables. From the app's perspective, use the same `appkit.lakebase.query()` pattern but **read-only**. **Key differences from CRUD tables:** @@ -195,19 +210,20 @@ Synced tables (created via `databricks postgres create-synced-table`) appear as **Permission grant required:** The app's SP has `CAN_CONNECT_AND_CREATE` but does **not** have `pg_read_all_data`. To read synced tables, the project owner must grant access — see the **`databricks-lakebase`** skill's SKILL.md "Grant app SP access to synced tables" section for the SQL commands and psql connection steps. -**Example tRPC route reading synced taxi data:** +**Example Express route reading synced taxi data:** ```typescript -topPickups: publicProcedure.query(async () => { - const { rows } = await AppKit.lakebase.query(` +// Inside onPluginsReady → appkit.server.extend((app) => { ... }) +app.get('/api/top-pickups', async (_req, res) => { + const { rows } = await appkit.lakebase.query(` SELECT pickup_zip, COUNT(*) AS trip_count, AVG(fare_amount) AS avg_fare FROM public.nyc_trips GROUP BY pickup_zip ORDER BY trip_count DESC LIMIT 10 `); - return rows; -}), + res.json(rows); +}); ``` > **Do not write to synced tables.** The sync pipeline manages the data — direct writes corrupt the sync state. For mixed read/write patterns, read from synced tables and write to separate app-owned tables. To create synced tables and grant the app's SP read access, see the **`databricks-lakebase`** skill's [synced-tables.md](../../../databricks-lakebase/references/synced-tables.md) and the "Grant app SP access to synced tables" section in its SKILL.md. @@ -217,8 +233,8 @@ topPickups: publicProcedure.query(async () => { | | Analytics | Lakebase | |--|-----------|---------| | SQL dialect | Databricks SQL (Spark SQL) | Standard PostgreSQL | -| Query location | `config/queries/*.sql` files | `pool.query()` in tRPC routes | -| Data retrieval | `useAnalyticsQuery` hook | tRPC query procedure | +| Query location | `config/queries/*.sql` files | `appkit.lakebase.query()` in Express routes | +| Data retrieval | `useAnalyticsQuery` hook | Express route via `server.extend()` | | Date functions | `CURRENT_TIMESTAMP()`, `DATEDIFF(DAY, ...)` | `NOW()`, `AGE(...)` | | Auto-increment | N/A | `SERIAL` or `GENERATED ALWAYS AS IDENTITY` | | Insert pattern | N/A | `INSERT ... VALUES ($1) RETURNING *` |