diff --git a/.agents/plugins/marketplace.json b/.agents/plugins/marketplace.json index d4b2e85..15154c6 100644 --- a/.agents/plugins/marketplace.json +++ b/.agents/plugins/marketplace.json @@ -15,7 +15,7 @@ { "name": "flow", "description": "Unified toolkit for Context-Driven Development with spec-first planning, TDD workflow, and Beads integration", - "version": "0.20.5", + "version": "0.21.0", "source": { "source": "local", "path": "./plugins/flow" }, "policy": { "installation": "AVAILABLE", diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 70bc3c5..c13f8bb 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,7 +11,7 @@ { "name": "flow", "description": "Unified toolkit for Context-Driven Development with spec-first planning, TDD workflow, and Beads integration", - "version": "0.20.5", + "version": "0.21.0", "source": "./", "author": { "name": "cofin" diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 8b9fd63..cefab54 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "flow", "description": "Unified toolkit for Context-Driven Development with spec-first planning, TDD workflow, and Beads integration", - "version": "0.20.5", + "version": "0.21.0", "author": { "name": "cofin" }, diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json index 322255a..66aabbf 100644 --- a/.codex-plugin/plugin.json +++ b/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "flow", - "version": "0.20.5", + "version": "0.21.0", "description": "Unified toolkit for Context-Driven Development with spec-first planning, TDD workflow, and Beads integration", "author": { "name": "cofin" }, "homepage": "https://github.com/cofin/flow", diff --git a/.codex/hooks.json b/.codex/hooks.json index a0da530..551050c 100644 --- a/.codex/hooks.json +++ b/.codex/hooks.json @@ -5,10 +5,11 @@ "matcher": "*", "hooks": [ { + "name": "flow-env-detection", "type": "command", - "command": "bash ./hooks/session-start.sh", + "command": "export FLOW_HOST=codex; r=\"${PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-.}}\"; bun \"$r/hooks/session-start.js\" || node \"$r/hooks/session-start.js\" || bash \"$r/hooks/session-start.sh\"", "timeout": 30, - "description": "Detects Beads backend and project root at session start (Codex CLI)." + "description": "Detects Beads backend and project root at session start (Codex CLI). Resolves the plugin root from $PLUGIN_ROOT (canonical) or $CLAUDE_PLUGIN_ROOT (alias), falling back to cwd for the in-repo project layer. FLOW_HOST=codex forces the Codex payload shape even when neither var is set." } ] } diff --git a/.opencode/agents/code-reviewer.md b/.opencode/agents/code-reviewer.md index c8c92b2..8806e5e 100644 --- a/.opencode/agents/code-reviewer.md +++ b/.opencode/agents/code-reviewer.md @@ -2,12 +2,10 @@ name: code-reviewer description: Review Flow specs, plans, and implementation changes for correctness, risk, and missing verification. mode: subagent -tools: - read: true - grep: true - glob: true - bash: true - webFetch: true +permission: + edit: deny + bash: allow + webfetch: allow --- Review Flow work for behavioral bugs, invalid host schemas, stale setup commands, missing tests, and missing verification evidence. Lead with findings ordered by severity. diff --git a/.opencode/agents/executor.md b/.opencode/agents/executor.md index 1faaab5..cb750a4 100644 --- a/.opencode/agents/executor.md +++ b/.opencode/agents/executor.md @@ -2,14 +2,10 @@ name: executor description: Execute Flow implementation tasks with TDD, Beads notes, verification, and sync discipline. mode: subagent -tools: - read: true - grep: true - glob: true - bash: true - edit: true - write: true - webFetch: true +permission: + edit: allow + bash: allow + webfetch: allow --- Execute the current Flow task with Beads notes, Red-Green-Refactor, and fresh verification before completion claims. diff --git a/.opencode/agents/plan-generator.md b/.opencode/agents/plan-generator.md index 2aa9f83..6eaabdc 100644 --- a/.opencode/agents/plan-generator.md +++ b/.opencode/agents/plan-generator.md @@ -2,14 +2,10 @@ name: plan-generator description: Generate zero-ambiguity Flow specs and implementation worksheets after codebase analysis. mode: subagent -tools: - read: true - grep: true - glob: true - bash: true - edit: true - write: true - webFetch: true +permission: + edit: allow + bash: allow + webfetch: allow --- Create implementation-ready Flow specs with exact file targets, task order, test commands, and acceptance checks. diff --git a/.opencode/agents/prd-orchestrator.md b/.opencode/agents/prd-orchestrator.md index a8adb29..4ab4ba0 100644 --- a/.opencode/agents/prd-orchestrator.md +++ b/.opencode/agents/prd-orchestrator.md @@ -2,14 +2,10 @@ name: prd-orchestrator description: Analyze broad goals and produce Flow PRD roadmaps with implementation-ready child flows. mode: subagent -tools: - read: true - grep: true - glob: true - bash: true - edit: true - write: true - webFetch: true +permission: + edit: allow + bash: allow + webfetch: allow --- Analyze broad Flow goals, complete research up front, and produce PRD roadmaps split into implementation-ready flows. diff --git a/AGENTS.md b/AGENTS.md index 3ced1a8..662af82 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -2,6 +2,8 @@ This file provides guidance to AI coding agents working with code in this repository. +> **Flow is a skill, not a CLI.** There is no `flow` executable. Never run `flow`, `flow sync`, `flow prd`, `flow status`, etc. as shell commands — they will fail. Invoke the Flow skill, or use the `/flow:*` slash commands (e.g. `/flow:sync`, `/flow:prd`). + ## Overview **Flow** is a unified toolkit for **Context-Driven Development** combining: diff --git a/CLAUDE.md b/CLAUDE.md index b8f5eb7..1e5dbfc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -2,6 +2,8 @@ Use the Flow skill for context-driven development workflows in repos that use `.agents/`. +> **Flow is a skill, not a CLI.** There is no `flow` executable. Never run `flow`, `flow sync`, `flow prd`, `flow status`, etc. as shell commands — they will fail. Invoke the Flow skill, or use the `/flow:*` slash commands (e.g. `/flow:sync`, `/flow:prd`). + ## Defaults - Use official Beads (`bd`) when task persistence is needed. diff --git a/GEMINI.md b/GEMINI.md index c7ee3ca..97ad9b2 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -2,6 +2,8 @@ Use the Flow skill for context-driven development workflows in repos that use `.agents/`. +> **Flow is a skill, not a CLI.** There is no `flow` executable. Never run `flow`, `flow sync`, `flow prd`, `flow status`, etc. as shell commands — they will fail. Invoke the Flow skill, or use the `/flow:*` slash commands (e.g. `/flow:sync`, `/flow:prd`). + ## Defaults - Prefer official Beads (`bd`) when task persistence is needed. diff --git a/commands/flow-plan.md b/commands/flow-plan.md index b96db9a..5b0b4a2 100644 --- a/commands/flow-plan.md +++ b/commands/flow-plan.md @@ -6,6 +6,8 @@ allowed-tools: Read, Write, Edit, Glob, Grep, Bash, Task, AskUserQuestion # Flow Plan > Lifecycle skill: use `flow-planning` through the `flow` router. +> +> **Grill before finalizing:** interrogate every open decision one question at a time (each with your recommended answer + trade-off), and explore the repo / `patterns.md` / `knowledge/` instead of asking when the answer is in the code. See `flow-planning` → "Interrogate Before Finalizing". ## The Planner Mandate diff --git a/commands/flow-prd.md b/commands/flow-prd.md index 346c404..c90b393 100644 --- a/commands/flow-prd.md +++ b/commands/flow-prd.md @@ -8,6 +8,8 @@ allowed-tools: Read, Write, Edit, Glob, Grep, Bash, Task, AskUserQuestion > **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `skills/flow/references/discipline.md`. > > Lifecycle skill: use `flow-planning` through the `flow` router. +> +> **Grill before finalizing:** interrogate every open decision one question at a time (each with your recommended answer + trade-off), and explore the repo / `patterns.md` / `knowledge/` instead of asking when the answer is in the code. Do not finish the roadmap while obvious research gaps remain. See `flow-planning` → "Interrogate Before Finalizing". ## The Orchestrator Mandate diff --git a/commands/flow-refine.md b/commands/flow-refine.md index 3fdb330..c9a9bf0 100644 --- a/commands/flow-refine.md +++ b/commands/flow-refine.md @@ -9,6 +9,8 @@ allowed-tools: Read, Write, Edit, Glob, Grep, Bash, WebSearch > **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `skills/flow/references/discipline.md`. > > Lifecycle skill: use `flow-planning` through the `flow` router. +> +> **Grill before finalizing:** interrogate every open decision one question at a time (each with your recommended answer + trade-off), and explore the repo / `patterns.md` / `knowledge/` instead of asking when the answer is in the code. Refinement is done only when a zero-context executor could implement from the worksheet alone. See `flow-planning` → "Interrogate Before Finalizing". Refining flow: **$ARGUMENTS** diff --git a/commands/flow-revert.md b/commands/flow-revert.md index 9d32764..d98b2f9 100644 --- a/commands/flow-revert.md +++ b/commands/flow-revert.md @@ -6,6 +6,8 @@ allowed-tools: Read, Write, Edit, Bash # Flow Revert +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `skills/flow/references/discipline.md`. +> > Lifecycle skill: use `flow-completion` through the `flow` router. Reverting: **$ARGUMENTS** diff --git a/commands/flow-sync.md b/commands/flow-sync.md index f32f563..301b53a 100644 --- a/commands/flow-sync.md +++ b/commands/flow-sync.md @@ -14,6 +14,12 @@ Syncing active backend state to disk for flow: **$ARGUMENTS** **CRITICAL:** `/flow:sync` is the primary bridge between the **Beads Source of Truth** and the **Markdown View**. Default setup runs it after task completion, note addition, or status changes when `syncPolicy.flowSyncAfterMutation` is enabled. +**What sync means here (and what it does NOT):** + +- `/flow:sync` **ALWAYS writes the reconciled markdown to disk** — updating **every markdown file in `.agents/specs//`** (`spec.md`, `learnings.md`, and any other tracked markdown in the flow folder), not just `spec.md`, so they all match Beads exactly. This write is **mandatory**; sync is never read-only/dry-run and must never finish without persisting the markdown. +- "Sync" / "export" in Flow means **making the markdown files and Beads reflect identical reality on disk** — nothing more. +- It does **NOT** mean Dolt. **NEVER run `bd dolt` commands** (`bd dolt push`/`pull`/`export`) as part of sync, regardless of phrasing. Those are out of scope for `/flow:sync` and only run if the user explicitly and separately asks for Dolt operations. + --- ## Phase 0: Environment Detection diff --git a/commands/flow-validate.md b/commands/flow-validate.md index da1c9c2..1f2dff3 100644 --- a/commands/flow-validate.md +++ b/commands/flow-validate.md @@ -5,6 +5,8 @@ allowed-tools: Read, Write, Edit, Glob, Grep, Bash # Flow Validate +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `skills/flow/references/discipline.md`. +> > Lifecycle skill: use `flow-setup` through the `flow` router. Validate Flow project integrity and optionally fix issues. diff --git a/commands/flow/revert.toml b/commands/flow/revert.toml index 1022283..68a6aa7 100644 --- a/commands/flow/revert.toml +++ b/commands/flow/revert.toml @@ -3,6 +3,9 @@ prompt = """ ## Lifecycle Skill Use the `flow-completion` lifecycle skill through the `flow` router before carrying out this command. +## Beads Mode +Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. + ## 1.0 SYSTEM DIRECTIVE You are a **Git-aware revert assistant** for the Flow framework with Beads integration. diff --git a/commands/flow/revise.toml b/commands/flow/revise.toml index cfb46a3..b6e2a93 100644 --- a/commands/flow/revise.toml +++ b/commands/flow/revise.toml @@ -3,6 +3,9 @@ prompt = """ ## Lifecycle Skill Use the `flow-planning` lifecycle skill through the `flow` router before carrying out this command. +## Beads Mode +Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. + ## SYSTEM DIRECTIVE You are revising a flow's spec or plan because implementation has revealed new information. All revisions must be documented. diff --git a/commands/flow/sync.toml b/commands/flow/sync.toml index ecd94d4..2666dad 100644 --- a/commands/flow/sync.toml +++ b/commands/flow/sync.toml @@ -9,6 +9,8 @@ Skip every `bd` invocation below when the SessionStart hook reports `Beads Backe ## 1.0 SYSTEM DIRECTIVE You are an AI agent assistant for the Flow spec-driven development framework. Your task is to synchronize the current active backend task state back to the on-disk `spec.md` for a flow, refresh context documents based on the codebase, and optionally generate an export summary. +MANDATORY: `/flow:sync` ALWAYS writes the reconciled markdown to disk so **every markdown file in `.agents/specs//`** (`spec.md`, `learnings.md`, and any other tracked markdown in the flow folder) — not just `spec.md` — matches Beads exactly. The write is mandatory — never finish sync read-only/dry-run. "Sync"/"export" here means reconciling markdown ↔ Beads to identical reality on disk; it does NOT mean Dolt. NEVER run `bd dolt` commands (`push`/`pull`/`export`) as part of sync unless the user explicitly and separately asks for Dolt. + CRITICAL: You must validate the success of every tool call. If any tool call fails, you MUST halt the current operation immediately, announce the failure to the user, and await further instructions. --- diff --git a/commands/flow/task.toml b/commands/flow/task.toml index ac045b5..4e62c2f 100644 --- a/commands/flow/task.toml +++ b/commands/flow/task.toml @@ -3,6 +3,9 @@ prompt = """ ## Lifecycle Skill Use the `flow-planning` lifecycle skill through the `flow` router before carrying out this command. +## Beads Mode +Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. + ## 1.0 SYSTEM DIRECTIVE You are an AI agent assistant for the Flow framework. Your goal is to create a "Task" - a lightweight, ephemeral track for quick tasks, explorations, or experiments that don't need a full PRD. diff --git a/docs/antigravity.md b/docs/antigravity.md new file mode 100644 index 0000000..da34844 --- /dev/null +++ b/docs/antigravity.md @@ -0,0 +1,57 @@ +# Installing Flow for Antigravity CLI + +Google is transitioning **Gemini CLI** into **Antigravity CLI**. Consumer Gemini CLI +tiers (free, AI Pro, AI Ultra) stop serving requests on **June 18, 2026**; enterprise +Gemini Code Assist Standard/Enterprise tiers retain full Gemini CLI support. In +Antigravity, "extensions" are renamed **plugins**, and Agent Skills, Hooks, and +Subagents are preserved. + +Flow targets Antigravity by **reusing its Gemini extension assets** — no separate +manifest is required as of June 2026. + +## What carries forward + +| Asset | File | Notes | +|---|---|---| +| Extension manifest | `gemini-extension.json` | Same schema; `${extensionPath}`/`${/}` tokens still valid | +| SessionStart hook | `hooks/hooks.json` | Auto-discovered; `bun \|\| node \|\| bash` ladder | +| Subagents | `agents/*.md` | Markdown + frontmatter | +| Commands | `commands/flow/*.toml` | TOML slash commands | +| Context | `GEMINI.md` / `AGENTS.md` | Antigravity reads both | +| Skills | `skills/**/SKILL.md` | `.agents/skills/` is recognized as an alias | + +The config hub remains `~/.gemini` (conversations, MCP config, plugins, approved +project folders, shared skills). + +## Install + +While Antigravity's marketplace/install flow stabilizes, install Flow the same way as +the Gemini extension (Antigravity reads the same hub): + +```bash +# Gemini CLI (still works on enterprise tiers and pre-cutover) +gemini extensions install https://github.com/cofin/flow --auto-update +``` + +For Antigravity-native installation, follow Google's transition guide once published +(`antigravity.google/docs`). The Flow assets above require no changes to register as +an Antigravity plugin. + +## Verify at release + +The Antigravity plugin manifest filename/location is being finalized by Google +("docs coming weeks" as of the transition announcement). At each Flow release: + +1. Check `antigravity.google/docs` and the + [Gemini → Antigravity CLI transition post](https://developers.googleblog.com/an-important-update-transitioning-gemini-cli-to-antigravity-cli/) + for a required manifest rename or new location. +2. If a new manifest file is introduced, add it to the `[tool.bumpversion]` file list in + `pyproject.toml` so its version stays in sync, and add it to the conformance matrix. +3. Confirm `${extensionPath}`/`${/}` hook tokens are still honored (they were as of June 2026). + +## Hook token note + +Antigravity uses the **Gemini** hook tokens (`${extensionPath}`, `${/}`) — NOT the +Codex `${PLUGIN_ROOT}` form. Flow keeps these separate: `hooks/hooks.json` (Gemini / +Antigravity) and `hooks/hooks-codex.json` (Codex). See +[host-conformance-matrix.md](./host-conformance-matrix.md). diff --git a/docs/host-conformance-matrix.md b/docs/host-conformance-matrix.md new file mode 100644 index 0000000..c78cdf9 --- /dev/null +++ b/docs/host-conformance-matrix.md @@ -0,0 +1,75 @@ +# Flow Multi-Host Conformance Matrix (June 2026) + +Authoritative per-host contract for what Flow ships and how each host consumes it, +verified against the June 2026 state of each platform. When a host's behavior or a +token changes, update this table **and** the validator/test that guards it so the +two never drift. This complements [multi-host-plugin-patterns.md](./multi-host-plugin-patterns.md) +(the how-to) with a tight conformance contract (the what-must-be-true). + +## Contract table + +| Host | Install root / manifest | Marketplace | Hook manifest | Hook token | Hook event | Agent file / format | Commands | +|---|---|---|---|---|---|---|---| +| **Claude Code** | `.claude-plugin/plugin.json` (components auto-discover at plugin root) | `.claude-plugin/marketplace.json` | `hooks/hooks-claude.json` (referenced from `plugin.json` `hooks`) | `${CLAUDE_PLUGIN_ROOT}` | `SessionStart` | `agents/*.md` (no `hooks`/`mcpServers`/`permissionMode` in plugin agents) | `commands/*.md` | +| **OpenAI Codex** | `.codex-plugin/plugin.json` (package: `plugins/flow/`) | `.agents/plugins/marketplace.json` (canonical); `.claude-plugin/marketplace.json` legacy | project: `.codex/hooks.json`; installed plugin: auto-discovered `hooks/hooks.json` (emitted from `hooks/hooks-codex.json`) | `${PLUGIN_ROOT}` canonical, `${CLAUDE_PLUGIN_ROOT}` alias, `.` fallback | `SessionStart` | `.codex/agents/*.toml` — requires `name`+`description`+`developer_instructions`; no per-agent `tools` (inherits session) | `commands/flow-*.md` | +| **Gemini CLI** | `gemini-extension.json` | `gemini extensions install ` | `hooks/hooks.json` (auto-discovered) | `${extensionPath}` + `${/}` | `SessionStart` | `agents/*.md` (`.gemini/agents/*.md` for user scope) | `commands/flow/*.toml` | +| **Antigravity CLI** | `gemini-extension.json` (carried forward; "extensions" → "plugins") | install hub `~/.gemini/config/plugins/` | `hooks/hooks.json` (reuses Gemini manifest) | `${extensionPath}` + `${/}` | `SessionStart` | reuses Gemini `agents/*.md`; reads `AGENTS.md`/`GEMINI.md` + `.agents/skills/` | reuses `commands/flow/*.toml` | +| **opencode** | `.opencode/plugins/flow.js` (`@opencode-ai/plugin`) | local / git (`opencode.json` `plugin`) | no SessionStart hook → `experimental.chat.system.transform` + `shell.env` | `FLOW_PLUGIN_ROOT` (set by plugin) | n/a (system-prompt injection) | `.opencode/agents/*.md` — `permission:` object (`allow`/`ask`/`deny`); `steps` not `maxSteps` | reuses skills/commands | +| **Cursor** | `.cursor/rules/flow.mdc` + `AGENTS.md` | n/a (no stable repo plugin API) | `hooks/hooks-cursor.json` (`sessionStart`, cwd-relative) — see constraint | cwd-relative `./hooks/session-start.sh` | `sessionStart` (camelCase) | n/a (rules-based) | n/a | +| **GitHub Copilot** | `.github/agents/*.agent.md` | n/a | n/a | n/a | n/a | `.agent.md` — `description` required; no retired `infer` | n/a | + +## Invariants (enforced by validators/tests) + +- **Codex hook commands** (`tools/validate-codex-manifest.py::validate_codex_hook_commands`): every Codex-consumed manifest (`.codex/hooks.json`, `hooks/hooks-codex.json`, and the two package copies) must contain **no** Gemini tokens (`${extensionPath}`/`${/}`) and must anchor to `$PLUGIN_ROOT`/`$CLAUDE_PLUGIN_ROOT`. Guards flow-9qx / GH #64. +- **Gemini hook manifest** (`tests/test_gemini_hooks.py`): top-level `hooks/hooks.json` keeps `${extensionPath}`/`${/}` and never embeds `${CLAUDE_PLUGIN_ROOT}`. +- **Claude hook manifest**: `hooks/hooks-claude.json` uses `${CLAUDE_PLUGIN_ROOT}` and never Gemini tokens; `plugin.json` points its `hooks` at it. +- **Package freshness** (`make codex-package-check`): `plugins/flow/` is regenerated from source; its auto-discovered `hooks/hooks.json` is the Codex-native manifest (Gemini installs from the repo root, not the package). +- **Version sync** (`tools/sync-manifests.py`): all bumpversion-tracked manifests carry one version. + +## How Codex resolves the plugin root (the flow-9qx fix) + +Codex runs `SessionStart` command hooks **through a shell with the session cwd** +(the user's project), not the plugin root. A bare `./hooks/session-start.sh` therefore +does not resolve once installed. The command anchors with a defensive expansion: + +```bash +bun "${PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-.}}/hooks/session-start.js" || node "…/session-start.js" || bash "…/session-start.sh" +``` + +- `${PLUGIN_ROOT}` — canonical Codex var (installed plugin). +- `${CLAUDE_PLUGIN_ROOT}` — Codex compat alias. +- `.` — cwd fallback for the in-repo project layer (where Codex sets neither var and cwd *is* the repo). + +`hooks/session-start.{sh,ps1}` host detection checks `CODEX_PLUGIN_ROOT`/`PLUGIN_ROOT` +**before** the Claude branch (Codex exports the alias too, so order matters). + +## Antigravity CLI status + +Gemini CLI consumer tiers stop serving **June 18, 2026**; the product becomes +**Antigravity CLI**, where "extensions" are renamed **plugins** (Skills/Hooks/Subagents +preserved). The extension format carries forward: Flow's existing `gemini-extension.json`, +`hooks/hooks.json` (`${extensionPath}`/`${/}`), `agents/*.md`, and `commands/flow/*.toml` +are the Antigravity plugin assets — `~/.gemini` remains the config hub and `.agents/skills/` +is recognized. Enterprise Gemini Code Assist tiers retain full Gemini CLI support. + +**Action at release:** verify the Antigravity plugin manifest filename/location against +live docs (`antigravity.google/docs`, the Gemini→Antigravity transition blog). If a new +manifest name is required, add it to the bumpversion file list so it stays version-synced. +See [antigravity.md](./antigravity.md) for the install/migration guide. + +## Documented constraints / deferred hardening + +- **Cursor command path** is cwd-relative (`./hooks/session-start.sh`). Cursor's repo + plugin API and whether it expands `${VAR}` in hook commands are not stably documented; + changing the form risks breaking a working integration. Left cwd-relative until Cursor + documents a plugin-root token. `session-start.sh` already detects `CURSOR_PLUGIN_ROOT`. +- **Claude exec-form hooks** (`args`) are available in June 2026 but **not adopted**: the + Codex/Gemini commands require shell form for the `bun||node||bash` ladder, the Claude + command is a fixed path with no user input (marginal injection benefit), and exec-form + token substitution in `args` is unverified in this environment. Shell form is kept + consistently across hosts. Revisit if Claude deprecates shell-form hooks. +- **opencode `@opencode-ai/plugin`** is pinned at `1.4.6` in `.opencode/package-lock.json` + (latest is `1.16.2`). The plugin uses documented hooks (`experimental.chat.system.transform`, + `shell.env`). Bumping requires regenerating the lockfile (network + a `package.json`); + re-verify the hook API against `1.16.2` and regenerate the lockfile as a follow-up rather + than hand-editing integrity hashes. diff --git a/docs/multi-host-plugin-patterns.md b/docs/multi-host-plugin-patterns.md index 3c75584..9740f51 100644 --- a/docs/multi-host-plugin-patterns.md +++ b/docs/multi-host-plugin-patterns.md @@ -1,6 +1,8 @@ -# Multi-Host Plugin Patterns (April 2026) +# Multi-Host Plugin Patterns (June 2026) -Reference for any repo that ships skills, commands, hooks, or agents across **Claude Code**, **Gemini CLI**, **Codex CLI**, **OpenCode**, and **Cursor**. Captures the exact manifest paths, schema keys, and adoption status verified against official docs in this Flow session. Paste into your own project's `docs/` and trim to what applies. +Reference for any repo that ships skills, commands, hooks, or agents across **Claude Code**, **Gemini CLI** / **Antigravity CLI**, **Codex CLI**, **OpenCode**, and **Cursor**. Captures the exact manifest paths, schema keys, and adoption status verified against official docs. Paste into your own project's `docs/` and trim to what applies. + +> For the tight per-host conformance contract (tokens, events, invariants enforced by validators/tests), see [host-conformance-matrix.md](./host-conformance-matrix.md). For the Gemini → Antigravity transition (consumer cutover **June 18, 2026**), see [antigravity.md](./antigravity.md). ## TL;DR — what every multi-host repo should ship @@ -13,8 +15,10 @@ Reference for any repo that ships skills, commands, hooks, or agents across **Cl | `.codex/agents/*.toml` | Codex CLI | Repo-local TOML subagents that inherit session tools | | `gemini-extension.json` | Gemini CLI | Extension manifest — `plan.directory`, **`excludeTools`**, `contextFileName` | | `agents/*.md` | Gemini CLI / Claude Code | Shared Markdown subagents with slug names and descriptions | -| `hooks/hooks.json` | Gemini CLI | Auto-discovered hook manifest | +| `hooks/hooks.json` | Gemini CLI / Antigravity | Auto-discovered hook manifest (`${extensionPath}`/`${/}` tokens) | | `hooks/hooks-claude.json` | Claude Code | Per-host hook manifest (referenced from `.claude-plugin/plugin.json`) | +| `hooks/hooks-codex.json` | Codex CLI | Codex-native hook manifest (`${PLUGIN_ROOT}` token); package build overwrites the package's `hooks/hooks.json` with it | +| `.codex/hooks.json` | Codex CLI | Project-layer hook manifest (same `${PLUGIN_ROOT}` command) | | `.cursor/rules/*.mdc` | Cursor | Workspace rules consumed by Cursor's supported customization surface | | `.github/agents/*.agent.md` | VS Code / Copilot | Workspace custom agents | | `.opencode/plugins/.js` | OpenCode | Local plugin entrypoint with managed-config awareness | @@ -129,8 +133,35 @@ In a Codex session, `/plugins` enables/disables installed plugins. **`storefront` interface block**: claimed in some sources but unverified in current Codex docs. Skip until you can read the schema directly from the openai/codex repo. +**Codex hooks** (`.codex/hooks.json` for the project layer; `hooks/hooks.json` auto-discovered for an installed plugin): Codex runs `SessionStart` command hooks **through a shell with the session cwd** (the user's project), NOT the plugin root. A bare `./hooks/...` path therefore breaks once installed. Anchor to the plugin root with a defensive expansion: + +```json +{ + "hooks": { + "SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "name": "", + "type": "command", + "command": "bun \"${PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-.}}/hooks/session-start.js\" || node \"${PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-.}}/hooks/session-start.js\" || bash \"${PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-.}}/hooks/session-start.sh\"", + "timeout": 30, + "description": "" + } + ] + } + ] + } +} +``` + +`${PLUGIN_ROOT}` is the canonical Codex var; `${CLAUDE_PLUGIN_ROOT}` is its compat alias; `.` is the cwd fallback for the in-repo project layer. Do **NOT** use Gemini's `${extensionPath}`/`${/}` tokens here — bash cannot expand them and Codex errors with `bad substitution` (this was GH #64). Because Gemini and Codex both auto-discover `hooks/hooks.json`, keep two source files — `hooks/hooks.json` (Gemini) and `hooks/hooks-codex.json` (Codex) — and have the package build overwrite the **package** copy of `hooks/hooks.json` with the Codex manifest (Gemini installs from the repo root, Codex from the package). + ### Gemini CLI +> **Sunset:** consumer Gemini CLI tiers stop serving requests **June 18, 2026** and become **Antigravity CLI** ("extensions" → "plugins"; Skills/Hooks/Subagents preserved). Enterprise Gemini Code Assist tiers keep full support. The extension format carries forward — see [antigravity.md](./antigravity.md). + **Extension manifest** (`gemini-extension.json` at repo root): ```json @@ -176,6 +207,10 @@ Use `${extensionPath}` (Gemini's install-root variable) and `${/}` (cross-platfo **`plan.directory`** is the only first-class plugin-author hook for redirecting plan-mode artifacts. None of Claude/Codex/OpenCode have an equivalent — they're all user-side config. +### Antigravity CLI + +The successor to Gemini CLI. Reuse the **Gemini assets verbatim** — `gemini-extension.json`, `hooks/hooks.json` (`${extensionPath}`/`${/}`), `agents/*.md`, `commands/flow/*.toml`, `GEMINI.md`/`AGENTS.md`, and `skills/**/SKILL.md` (`.agents/skills/` recognized). Config hub is `~/.gemini`. No Codex `${PLUGIN_ROOT}` tokens here — Antigravity uses the Gemini token set. Verify the plugin manifest filename against live docs at release time (Google's transition docs were still landing as of June 2026). Full guide: [antigravity.md](./antigravity.md). + ### Cursor Use Cursor rules and shared project instructions: @@ -325,7 +360,8 @@ python3 -c "import json; [json.load(open(p)) for p in [ '.claude-plugin/marketplace.json', '.claude-plugin/plugin.json', '.agents/plugins/marketplace.json', '.codex-plugin/plugin.json', 'gemini-extension.json', - 'hooks/hooks.json', 'hooks/hooks-claude.json', 'hooks/hooks-cursor.json' + 'hooks/hooks.json', 'hooks/hooks-claude.json', 'hooks/hooks-codex.json', + 'hooks/hooks-cursor.json', '.codex/hooks.json' ]]" # OpenCode plugin diff --git a/gemini-extension.json b/gemini-extension.json index 19c53ad..d24eda4 100644 --- a/gemini-extension.json +++ b/gemini-extension.json @@ -1,7 +1,7 @@ { "name": "flow", "description": "Unified toolkit for Context-Driven Development with spec-first planning, TDD workflow, and Beads integration", - "version": "0.20.5", + "version": "0.21.0", "contextFileName": "GEMINI.md", "plan": { "directory": ".agents" diff --git a/hooks/detect-env.ps1 b/hooks/detect-env.ps1 index 3046cf5..ba40dc9 100644 --- a/hooks/detect-env.ps1 +++ b/hooks/detect-env.ps1 @@ -17,6 +17,7 @@ $env:BD_JSON_ENVELOPE = '1' function Get-BeadsBackend { Write-Host "## Flow Environment Context" + Write-Host "- **Flow is a SKILL, not a CLI**: there is no ``flow`` executable. NEVER run ``flow``, ``flow sync``, ``flow prd``, ``flow status``, etc. as shell commands. Invoke the Flow skill, or use the ``/flow:*`` (e.g. ``/flow:sync``) slash commands where the host supports them." if ($script:USE_BEADS -ne 'true') { Write-Host "- **Beads Backend**: Disabled via plugin config (useBeads=false)" return "disabled" diff --git a/hooks/detect-env.sh b/hooks/detect-env.sh index 0a8b595..f40f7f4 100755 --- a/hooks/detect-env.sh +++ b/hooks/detect-env.sh @@ -48,6 +48,7 @@ safe_run() { detect_beads() { echo "## Flow Environment Context" + echo "- **Flow is a SKILL, not a CLI**: there is no \`flow\` executable. NEVER run \`flow\`, \`flow sync\`, \`flow prd\`, \`flow status\`, etc. as shell commands. Invoke the Flow skill, or use the \`/flow:*\` (e.g. \`/flow:sync\`) slash commands where the host supports them." if [[ "${USE_BEADS}" != "true" ]]; then echo "- **Beads Backend**: Disabled via plugin config (useBeads=false)" return diff --git a/hooks/hooks-codex.json b/hooks/hooks-codex.json new file mode 100644 index 0000000..551050c --- /dev/null +++ b/hooks/hooks-codex.json @@ -0,0 +1,18 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "name": "flow-env-detection", + "type": "command", + "command": "export FLOW_HOST=codex; r=\"${PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-.}}\"; bun \"$r/hooks/session-start.js\" || node \"$r/hooks/session-start.js\" || bash \"$r/hooks/session-start.sh\"", + "timeout": 30, + "description": "Detects Beads backend and project root at session start (Codex CLI). Resolves the plugin root from $PLUGIN_ROOT (canonical) or $CLAUDE_PLUGIN_ROOT (alias), falling back to cwd for the in-repo project layer. FLOW_HOST=codex forces the Codex payload shape even when neither var is set." + } + ] + } + ] + } +} diff --git a/hooks/session-start.ps1 b/hooks/session-start.ps1 index fb42025..d6c6044 100644 --- a/hooks/session-start.ps1 +++ b/hooks/session-start.ps1 @@ -19,18 +19,23 @@ function Invoke-Detection { } function Write-Schema([string]$context) { - # Mirror session-start.sh dispatch. Gemini also exports CLAUDE_PROJECT_DIR - # as a compat alias, so check CLAUDE_PLUGIN_ROOT (which Gemini does NOT set) - # to disambiguate Claude from Gemini. + # Mirror session-start.sh dispatch. Codex exports PLUGIN_ROOT (canonical) and + # CLAUDE_PLUGIN_ROOT (compat alias), so check the Codex-specific markers BEFORE + # the Claude branch. Gemini exports CLAUDE_PROJECT_DIR as a compat alias but + # never CLAUDE_PLUGIN_ROOT, so the Claude branch stays unambiguous. $host = 'unknown' - if ($env:CLAUDE_PLUGIN_ROOT) { + if ($env:FLOW_HOST) { + # Explicit override set by a host's hook command (e.g. Codex sets + # FLOW_HOST=codex) — authoritative when the host exports no plugin-root var. + $host = $env:FLOW_HOST + } elseif ($env:CODEX_PLUGIN_ROOT -or $env:PLUGIN_ROOT) { + $host = 'codex' + } elseif ($env:CLAUDE_PLUGIN_ROOT) { $host = 'claude' } elseif ($env:GEMINI_SESSION_ID -or $env:GEMINI_CWD -or $env:GEMINI_PROJECT_DIR) { $host = 'gemini' } elseif ($env:OPENCODE_PLUGIN_ROOT -or $env:FLOW_PLUGIN_ROOT) { $host = 'opencode' - } elseif ($env:CODEX_PLUGIN_ROOT) { - $host = 'codex' } elseif ($env:CURSOR_PLUGIN_ROOT) { $host = 'cursor' } diff --git a/hooks/session-start.sh b/hooks/session-start.sh index aa66737..e7471e7 100755 --- a/hooks/session-start.sh +++ b/hooks/session-start.sh @@ -78,18 +78,24 @@ main() { local escaped_context escaped_context=$(escape_json "${context}") - # Detect host. Priority: explicit plugin-root vars first, then Gemini's - # session marker (Gemini also exports CLAUDE_PROJECT_DIR as a compat alias, - # so we check CLAUDE_PLUGIN_ROOT — which Gemini does NOT set — to disambiguate). + # Detect host. Priority: Codex's plugin-root vars first, then Claude. + # Codex exports PLUGIN_ROOT (canonical) and CLAUDE_PLUGIN_ROOT (compat alias), + # so we must check the Codex-specific markers BEFORE the Claude branch to avoid + # misdetecting Codex as Claude. Gemini exports CLAUDE_PROJECT_DIR as a compat + # alias but never CLAUDE_PLUGIN_ROOT, so the Claude branch stays unambiguous. local host="unknown" - if [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then + if [[ -n "${FLOW_HOST:-}" ]]; then + # Explicit override set by a host's hook command (e.g. Codex sets + # FLOW_HOST=codex) — authoritative when the host exports no plugin-root var. + host="${FLOW_HOST}" + elif [[ -n "${CODEX_PLUGIN_ROOT:-}" ]] || [[ -n "${PLUGIN_ROOT:-}" ]]; then + host="codex" + elif [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then host="claude" elif [[ -n "${GEMINI_SESSION_ID:-}" ]] || [[ -n "${GEMINI_CWD:-}" ]] || [[ -n "${GEMINI_PROJECT_DIR:-}" ]]; then host="gemini" elif [[ -n "${OPENCODE_PLUGIN_ROOT:-}" ]] || [[ -n "${FLOW_PLUGIN_ROOT:-}" ]]; then host="opencode" - elif [[ -n "${CODEX_PLUGIN_ROOT:-}" ]]; then - host="codex" elif [[ -n "${CURSOR_PLUGIN_ROOT:-}" ]]; then host="cursor" fi diff --git a/package.json b/package.json index 9b35ee1..2477763 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "flow", - "version": "0.20.5", + "version": "0.21.0", "description": "Unified toolkit for Context-Driven Development", "type": "module", "main": ".opencode/plugins/flow.js", diff --git a/plugins/flow/.codex-plugin/plugin.json b/plugins/flow/.codex-plugin/plugin.json index 322255a..66aabbf 100644 --- a/plugins/flow/.codex-plugin/plugin.json +++ b/plugins/flow/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "flow", - "version": "0.20.5", + "version": "0.21.0", "description": "Unified toolkit for Context-Driven Development with spec-first planning, TDD workflow, and Beads integration", "author": { "name": "cofin" }, "homepage": "https://github.com/cofin/flow", diff --git a/plugins/flow/.codex/hooks.json b/plugins/flow/.codex/hooks.json index a0da530..551050c 100644 --- a/plugins/flow/.codex/hooks.json +++ b/plugins/flow/.codex/hooks.json @@ -5,10 +5,11 @@ "matcher": "*", "hooks": [ { + "name": "flow-env-detection", "type": "command", - "command": "bash ./hooks/session-start.sh", + "command": "export FLOW_HOST=codex; r=\"${PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-.}}\"; bun \"$r/hooks/session-start.js\" || node \"$r/hooks/session-start.js\" || bash \"$r/hooks/session-start.sh\"", "timeout": 30, - "description": "Detects Beads backend and project root at session start (Codex CLI)." + "description": "Detects Beads backend and project root at session start (Codex CLI). Resolves the plugin root from $PLUGIN_ROOT (canonical) or $CLAUDE_PLUGIN_ROOT (alias), falling back to cwd for the in-repo project layer. FLOW_HOST=codex forces the Codex payload shape even when neither var is set." } ] } diff --git a/plugins/flow/commands/flow-plan.md b/plugins/flow/commands/flow-plan.md index b96db9a..5b0b4a2 100644 --- a/plugins/flow/commands/flow-plan.md +++ b/plugins/flow/commands/flow-plan.md @@ -6,6 +6,8 @@ allowed-tools: Read, Write, Edit, Glob, Grep, Bash, Task, AskUserQuestion # Flow Plan > Lifecycle skill: use `flow-planning` through the `flow` router. +> +> **Grill before finalizing:** interrogate every open decision one question at a time (each with your recommended answer + trade-off), and explore the repo / `patterns.md` / `knowledge/` instead of asking when the answer is in the code. See `flow-planning` → "Interrogate Before Finalizing". ## The Planner Mandate diff --git a/plugins/flow/commands/flow-prd.md b/plugins/flow/commands/flow-prd.md index 346c404..c90b393 100644 --- a/plugins/flow/commands/flow-prd.md +++ b/plugins/flow/commands/flow-prd.md @@ -8,6 +8,8 @@ allowed-tools: Read, Write, Edit, Glob, Grep, Bash, Task, AskUserQuestion > **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `skills/flow/references/discipline.md`. > > Lifecycle skill: use `flow-planning` through the `flow` router. +> +> **Grill before finalizing:** interrogate every open decision one question at a time (each with your recommended answer + trade-off), and explore the repo / `patterns.md` / `knowledge/` instead of asking when the answer is in the code. Do not finish the roadmap while obvious research gaps remain. See `flow-planning` → "Interrogate Before Finalizing". ## The Orchestrator Mandate diff --git a/plugins/flow/commands/flow-refine.md b/plugins/flow/commands/flow-refine.md index 3fdb330..c9a9bf0 100644 --- a/plugins/flow/commands/flow-refine.md +++ b/plugins/flow/commands/flow-refine.md @@ -9,6 +9,8 @@ allowed-tools: Read, Write, Edit, Glob, Grep, Bash, WebSearch > **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `skills/flow/references/discipline.md`. > > Lifecycle skill: use `flow-planning` through the `flow` router. +> +> **Grill before finalizing:** interrogate every open decision one question at a time (each with your recommended answer + trade-off), and explore the repo / `patterns.md` / `knowledge/` instead of asking when the answer is in the code. Refinement is done only when a zero-context executor could implement from the worksheet alone. See `flow-planning` → "Interrogate Before Finalizing". Refining flow: **$ARGUMENTS** diff --git a/plugins/flow/commands/flow-revert.md b/plugins/flow/commands/flow-revert.md index 9d32764..d98b2f9 100644 --- a/plugins/flow/commands/flow-revert.md +++ b/plugins/flow/commands/flow-revert.md @@ -6,6 +6,8 @@ allowed-tools: Read, Write, Edit, Bash # Flow Revert +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `skills/flow/references/discipline.md`. +> > Lifecycle skill: use `flow-completion` through the `flow` router. Reverting: **$ARGUMENTS** diff --git a/plugins/flow/commands/flow-sync.md b/plugins/flow/commands/flow-sync.md index f32f563..301b53a 100644 --- a/plugins/flow/commands/flow-sync.md +++ b/plugins/flow/commands/flow-sync.md @@ -14,6 +14,12 @@ Syncing active backend state to disk for flow: **$ARGUMENTS** **CRITICAL:** `/flow:sync` is the primary bridge between the **Beads Source of Truth** and the **Markdown View**. Default setup runs it after task completion, note addition, or status changes when `syncPolicy.flowSyncAfterMutation` is enabled. +**What sync means here (and what it does NOT):** + +- `/flow:sync` **ALWAYS writes the reconciled markdown to disk** — updating **every markdown file in `.agents/specs//`** (`spec.md`, `learnings.md`, and any other tracked markdown in the flow folder), not just `spec.md`, so they all match Beads exactly. This write is **mandatory**; sync is never read-only/dry-run and must never finish without persisting the markdown. +- "Sync" / "export" in Flow means **making the markdown files and Beads reflect identical reality on disk** — nothing more. +- It does **NOT** mean Dolt. **NEVER run `bd dolt` commands** (`bd dolt push`/`pull`/`export`) as part of sync, regardless of phrasing. Those are out of scope for `/flow:sync` and only run if the user explicitly and separately asks for Dolt operations. + --- ## Phase 0: Environment Detection diff --git a/plugins/flow/commands/flow-validate.md b/plugins/flow/commands/flow-validate.md index da1c9c2..1f2dff3 100644 --- a/plugins/flow/commands/flow-validate.md +++ b/plugins/flow/commands/flow-validate.md @@ -5,6 +5,8 @@ allowed-tools: Read, Write, Edit, Glob, Grep, Bash # Flow Validate +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `skills/flow/references/discipline.md`. +> > Lifecycle skill: use `flow-setup` through the `flow` router. Validate Flow project integrity and optionally fix issues. diff --git a/plugins/flow/hooks/detect-env.ps1 b/plugins/flow/hooks/detect-env.ps1 index 3046cf5..ba40dc9 100644 --- a/plugins/flow/hooks/detect-env.ps1 +++ b/plugins/flow/hooks/detect-env.ps1 @@ -17,6 +17,7 @@ $env:BD_JSON_ENVELOPE = '1' function Get-BeadsBackend { Write-Host "## Flow Environment Context" + Write-Host "- **Flow is a SKILL, not a CLI**: there is no ``flow`` executable. NEVER run ``flow``, ``flow sync``, ``flow prd``, ``flow status``, etc. as shell commands. Invoke the Flow skill, or use the ``/flow:*`` (e.g. ``/flow:sync``) slash commands where the host supports them." if ($script:USE_BEADS -ne 'true') { Write-Host "- **Beads Backend**: Disabled via plugin config (useBeads=false)" return "disabled" diff --git a/plugins/flow/hooks/detect-env.sh b/plugins/flow/hooks/detect-env.sh index 0a8b595..f40f7f4 100755 --- a/plugins/flow/hooks/detect-env.sh +++ b/plugins/flow/hooks/detect-env.sh @@ -48,6 +48,7 @@ safe_run() { detect_beads() { echo "## Flow Environment Context" + echo "- **Flow is a SKILL, not a CLI**: there is no \`flow\` executable. NEVER run \`flow\`, \`flow sync\`, \`flow prd\`, \`flow status\`, etc. as shell commands. Invoke the Flow skill, or use the \`/flow:*\` (e.g. \`/flow:sync\`) slash commands where the host supports them." if [[ "${USE_BEADS}" != "true" ]]; then echo "- **Beads Backend**: Disabled via plugin config (useBeads=false)" return diff --git a/plugins/flow/hooks/hooks-codex.json b/plugins/flow/hooks/hooks-codex.json new file mode 100644 index 0000000..551050c --- /dev/null +++ b/plugins/flow/hooks/hooks-codex.json @@ -0,0 +1,18 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "name": "flow-env-detection", + "type": "command", + "command": "export FLOW_HOST=codex; r=\"${PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-.}}\"; bun \"$r/hooks/session-start.js\" || node \"$r/hooks/session-start.js\" || bash \"$r/hooks/session-start.sh\"", + "timeout": 30, + "description": "Detects Beads backend and project root at session start (Codex CLI). Resolves the plugin root from $PLUGIN_ROOT (canonical) or $CLAUDE_PLUGIN_ROOT (alias), falling back to cwd for the in-repo project layer. FLOW_HOST=codex forces the Codex payload shape even when neither var is set." + } + ] + } + ] + } +} diff --git a/plugins/flow/hooks/hooks.json b/plugins/flow/hooks/hooks.json index 9518cbb..551050c 100644 --- a/plugins/flow/hooks/hooks.json +++ b/plugins/flow/hooks/hooks.json @@ -7,8 +7,9 @@ { "name": "flow-env-detection", "type": "command", - "command": "bun ${extensionPath}${/}hooks${/}session-start.js || node ${extensionPath}${/}hooks${/}session-start.js || bash ${extensionPath}${/}hooks${/}session-start.sh", - "description": "Detects Beads backend and project root at session start (Gemini CLI)." + "command": "export FLOW_HOST=codex; r=\"${PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-.}}\"; bun \"$r/hooks/session-start.js\" || node \"$r/hooks/session-start.js\" || bash \"$r/hooks/session-start.sh\"", + "timeout": 30, + "description": "Detects Beads backend and project root at session start (Codex CLI). Resolves the plugin root from $PLUGIN_ROOT (canonical) or $CLAUDE_PLUGIN_ROOT (alias), falling back to cwd for the in-repo project layer. FLOW_HOST=codex forces the Codex payload shape even when neither var is set." } ] } diff --git a/plugins/flow/hooks/session-start.ps1 b/plugins/flow/hooks/session-start.ps1 index fb42025..d6c6044 100644 --- a/plugins/flow/hooks/session-start.ps1 +++ b/plugins/flow/hooks/session-start.ps1 @@ -19,18 +19,23 @@ function Invoke-Detection { } function Write-Schema([string]$context) { - # Mirror session-start.sh dispatch. Gemini also exports CLAUDE_PROJECT_DIR - # as a compat alias, so check CLAUDE_PLUGIN_ROOT (which Gemini does NOT set) - # to disambiguate Claude from Gemini. + # Mirror session-start.sh dispatch. Codex exports PLUGIN_ROOT (canonical) and + # CLAUDE_PLUGIN_ROOT (compat alias), so check the Codex-specific markers BEFORE + # the Claude branch. Gemini exports CLAUDE_PROJECT_DIR as a compat alias but + # never CLAUDE_PLUGIN_ROOT, so the Claude branch stays unambiguous. $host = 'unknown' - if ($env:CLAUDE_PLUGIN_ROOT) { + if ($env:FLOW_HOST) { + # Explicit override set by a host's hook command (e.g. Codex sets + # FLOW_HOST=codex) — authoritative when the host exports no plugin-root var. + $host = $env:FLOW_HOST + } elseif ($env:CODEX_PLUGIN_ROOT -or $env:PLUGIN_ROOT) { + $host = 'codex' + } elseif ($env:CLAUDE_PLUGIN_ROOT) { $host = 'claude' } elseif ($env:GEMINI_SESSION_ID -or $env:GEMINI_CWD -or $env:GEMINI_PROJECT_DIR) { $host = 'gemini' } elseif ($env:OPENCODE_PLUGIN_ROOT -or $env:FLOW_PLUGIN_ROOT) { $host = 'opencode' - } elseif ($env:CODEX_PLUGIN_ROOT) { - $host = 'codex' } elseif ($env:CURSOR_PLUGIN_ROOT) { $host = 'cursor' } diff --git a/plugins/flow/hooks/session-start.sh b/plugins/flow/hooks/session-start.sh index aa66737..e7471e7 100755 --- a/plugins/flow/hooks/session-start.sh +++ b/plugins/flow/hooks/session-start.sh @@ -78,18 +78,24 @@ main() { local escaped_context escaped_context=$(escape_json "${context}") - # Detect host. Priority: explicit plugin-root vars first, then Gemini's - # session marker (Gemini also exports CLAUDE_PROJECT_DIR as a compat alias, - # so we check CLAUDE_PLUGIN_ROOT — which Gemini does NOT set — to disambiguate). + # Detect host. Priority: Codex's plugin-root vars first, then Claude. + # Codex exports PLUGIN_ROOT (canonical) and CLAUDE_PLUGIN_ROOT (compat alias), + # so we must check the Codex-specific markers BEFORE the Claude branch to avoid + # misdetecting Codex as Claude. Gemini exports CLAUDE_PROJECT_DIR as a compat + # alias but never CLAUDE_PLUGIN_ROOT, so the Claude branch stays unambiguous. local host="unknown" - if [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then + if [[ -n "${FLOW_HOST:-}" ]]; then + # Explicit override set by a host's hook command (e.g. Codex sets + # FLOW_HOST=codex) — authoritative when the host exports no plugin-root var. + host="${FLOW_HOST}" + elif [[ -n "${CODEX_PLUGIN_ROOT:-}" ]] || [[ -n "${PLUGIN_ROOT:-}" ]]; then + host="codex" + elif [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then host="claude" elif [[ -n "${GEMINI_SESSION_ID:-}" ]] || [[ -n "${GEMINI_CWD:-}" ]] || [[ -n "${GEMINI_PROJECT_DIR:-}" ]]; then host="gemini" elif [[ -n "${OPENCODE_PLUGIN_ROOT:-}" ]] || [[ -n "${FLOW_PLUGIN_ROOT:-}" ]]; then host="opencode" - elif [[ -n "${CODEX_PLUGIN_ROOT:-}" ]]; then - host="codex" elif [[ -n "${CURSOR_PLUGIN_ROOT:-}" ]]; then host="cursor" fi diff --git a/plugins/flow/skills/alloydb-omni/SKILL.md b/plugins/flow/skills/alloydb-omni/SKILL.md deleted file mode 100644 index 263d89f..0000000 --- a/plugins/flow/skills/alloydb-omni/SKILL.md +++ /dev/null @@ -1,276 +0,0 @@ ---- -name: alloydb-omni -description: "Use when running AlloyDB Omni locally or outside GCP, configuring container deployments, Kubernetes operators, RPM installs, columnar engine tests, or local development that needs AlloyDB behavior." ---- - -# AlloyDB Omni - -## Overview - -AlloyDB Omni is the downloadable edition of AlloyDB that runs anywhere: local machines, on-premises data centers, or other cloud providers. It is distributed as a container image and includes the same query processing and columnar engine as the managed AlloyDB service. - -## Operating Layers - -Use this skill in three distinct layers: - -1. **Deploy** AlloyDB Omni on Docker, Podman, Kubernetes, or RPM-based hosts. -2. **Connect** an agent or client to the running database. -3. **Operate** the database with lifecycle, tuning, backups, diagnostics, and upgrades. - -Keep those layers separate when giving guidance. Deployment is not the same thing as agent connectivity. - -## Quick Reference - -### Deployment Methods - -| Method | Image | Use Case | -|---|---|---| -| Docker | `google/alloydbomni:latest` | Local development, CI | -| Podman | `google/alloydbomni:latest` | Rootless containers, RHEL | -| Kubernetes | AlloyDB Omni Operator | Production on-prem/multi-cloud | -| RPM | `alloydbomni` package | Bare metal / VM (RHEL/CentOS) | - -### Key Environment Variables - -| Variable | Purpose | Example | -|---|---|---| -| `POSTGRES_PASSWORD` | Initial superuser password (required) | `mysecretpassword` | -| `POSTGRES_DB` | Database to create on first start | `myapp` | -| `POSTGRES_USER` | Superuser name (default: `postgres`) | `postgres` | - -### Dev Workflow - -1. Start container with `docker compose up -d` -2. Connect with `psql -h localhost -U postgres` -3. Use AlloyDB features (columnar engine, ML embeddings) locally -4. Tear down with `docker compose down` (data persists in named volume) - - - -## Workflow - -### Step 1: Choose Deployment Method - -Use Docker/Podman for local development and CI. Use the Kubernetes operator for production non-GCP deployments. Use RPM for bare-metal servers. - -### Step 2: Configure Container Resources - -Set `--memory`, `--cpus`, and `--shm-size` based on workload. For development, 2 CPUs / 4GB RAM / 256MB shared memory is a reasonable starting point. - -### Step 3: Set Up Persistence - -Always use a named volume for `/var/lib/postgresql/data`. Without a volume, data is lost when the container stops. Optionally mount `./init-scripts` to `/docker-entrypoint-initdb.d` for first-run SQL. - -### Step 4: Tune PostgreSQL Parameters - -For non-trivial workloads, configure `shared_buffers` (25% of container memory), `effective_cache_size` (75%), and `work_mem` via `ALTER SYSTEM SET` or a mounted config file. - -### Step 5: Connect and Develop - -Connect via `localhost:5432`. AlloyDB Omni supports all AlloyDB features including the columnar engine, so you can test analytical queries locally. - - - -## Host Integration Order - -Use the lowest-admin supported path for the current host, and degrade cleanly: - -1. **Gemini CLI**: use the dedicated `alloydb-omni` extension. -2. **Other agents with MCP support**: use MCP Toolbox with the official AlloyDB Omni prebuilt config. -3. **No extension / no MCP**: fall back to Docker/Podman/Kubernetes/RPM plus `psql` and SQL guidance from this skill's references. - -Do not make the skill Gemini-only. The Gemini extension path is preferred when available, but the deployment and operational guidance in this skill must still work across other agents and plain terminal workflows. - - - -## Guardrails - -- **Always set container resource limits** — without `--memory` and `--cpus`, the container can consume all host resources and destabilize the machine -- **Always use a named volume** for data persistence — bind mounts work but named volumes are more portable and easier to manage -- **Set `shm_size` to at least 256MB** — the default 64MB is too small for PostgreSQL and causes "could not resize shared memory segment" errors -- **Never use `POSTGRES_PASSWORD` in production** — use secrets management (Docker secrets, Kubernetes secrets, or Vault) -- **Back up the data volume regularly** — use `pg_dump` or volume snapshots; there is no managed backup like GCP AlloyDB -- **Pin the image tag in CI** — `google/alloydbomni:latest` can change between runs; use a specific version tag for reproducibility - - - - - -### Validation Checkpoint - -Before delivering configurations, verify: - -- [ ] Container has explicit memory and CPU limits set -- [ ] Data directory uses a named volume, not a tmpfs or anonymous volume -- [ ] `shm_size` is set to at least 256MB -- [ ] `POSTGRES_PASSWORD` is set (container will not start without it) -- [ ] Port mapping is correct (default: 5432:5432) - - - - - -## Example - -Docker Compose for local AlloyDB Omni development: - -```yaml -# docker-compose.yml -services: - alloydb: - image: google/alloydbomni:latest - container_name: alloydb-omni - environment: - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-devsecret} - POSTGRES_DB: myapp - POSTGRES_USER: postgres - ports: - - "5432:5432" - volumes: - - alloydb-data:/var/lib/postgresql/data - - ./init-scripts:/docker-entrypoint-initdb.d - restart: unless-stopped - shm_size: "256m" - deploy: - resources: - limits: - cpus: "2" - memory: 4G - -volumes: - alloydb-data: -``` - -Initialization script to enable the columnar engine: - -```sql --- init-scripts/01-extensions.sql -CREATE EXTENSION IF NOT EXISTS vector; -CREATE EXTENSION IF NOT EXISTS google_ml_integration; -``` - - - -## Kubernetes Operator Lifecycle - -The AlloyDB Omni Kubernetes Operator manages `DBCluster` custom resources (CRD: `dbclusters.alloydbomni.dbadmin.goog/v1`). Key lifecycle operations: - -- **HA failover**: enable automatic standby with `availabilityOptions.standby: Enabled` in `primarySpec`; the operator promotes the standby automatically on primary failure -- **Read replica scaling**: `kubectl patch dbcluster --type=merge -p '{"spec":{"readPoolSpec":{"replicas":}}}'` -- **Rolling parameter updates**: patching `primarySpec.parameters` triggers a controlled rolling restart with no data loss -- **Backup**: annotate the DBCluster with `alloydbomni.dbadmin.goog/backup=true` to trigger an immediate backup -- **Upgrades**: update `databaseVersion` or the image tag; the operator orchestrates a rolling restart - -See [references/kubernetes-operator.md](references/kubernetes-operator.md) for the full CRD spec, HA configuration YAML, scaling examples, health monitoring, and upgrade procedures. - -## RPM Lifecycle - -RPM-based AlloyDB Omni installs are a first-class deployment path for RHEL-family hosts, VMs, and bare-metal systems where containers are not the right fit. - -Key lifecycle operations: - -- **Install repository + package**: add the AlloyDB Omni yum repo, then `yum install alloydbomni` -- **Initialize data directory**: run `alloydb-omni init --data-dir=...` before first start -- **Manage the service**: use `systemctl enable --now alloydb-omni`, `status`, `restart`, and `journalctl` -- **Tune PostgreSQL settings**: change parameters with `ALTER SYSTEM SET ...` and restart the service -- **Upgrade in place**: update the RPM package, restart the service, and verify version + extension state -- **Back up and validate**: verify local storage, service health, and extension availability before and after upgrades - -See [references/rpm.md](references/rpm.md) for the full install, service-management, configuration, validation, and upgrade workflow. - -## Performance Diagnostics - -Key diagnostics for AlloyDB Omni production workloads: - -- **Query plans**: use `EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)` to identify sequential scans, high-cost nodes, and buffer hit ratios -- **Invalid indexes**: query `pg_class JOIN pg_index` where `indisvalid = false` to find indexes that need rebuilding with `REINDEX CONCURRENTLY` -- **Bloat detection**: query `pg_stat_user_tables` for `n_dead_tup` and `n_live_tup` ratios; tables with dead-tuple ratio above 20% are candidates for `VACUUM ANALYZE` -- **Active query monitoring**: `pg_stat_activity` filtered on `state = 'active'` and `wait_event_type` to identify lock waits and long-running queries - -See [references/performance.md](references/performance.md) for ready-to-run diagnostic queries, autovacuum tuning, and connection lifecycle management. - -## Columnar Engine Tuning - -The columnar engine accelerates analytical queries by caching selected columns in a compressed in-memory format. - -- **Memory limit**: set `google_columnar_engine.memory_limit` (e.g., `ALTER SYSTEM SET google_columnar_engine.memory_limit = '4GB'`) — allocate 10–25% of total container/node memory -- **Recommended columns**: add wide tables with high read frequency and low update frequency via `SELECT google_columnar_engine_add('')` or individual column-level population -- **Cost/benefit check**: compare `EXPLAIN` output before and after adding a table — look for `Custom Scan (columnar scan)` nodes replacing `Seq Scan` -- **Cache inspection**: `SELECT * FROM g_columnar_memory_usage` shows per-relation memory consumption and hit rates - -## Gemini CLI and MCP Toolbox - -This section is for the **connection layer**, not for deploying AlloyDB Omni itself. - -For AlloyDB Omni, prefer the dedicated Gemini CLI extension when Gemini is the active host. Use the generic PostgreSQL route only as a fallback when the dedicated extension is unavailable. - -```bash -gemini extensions install https://github.com/gemini-cli-extensions/alloydb-omni --auto-update -gemini extensions config alloydb-omni --scope workspace -``` - -Guide the user through the required connection variables before starting Gemini: - -```bash -export ALLOYDB_OMNI_HOST="" -export ALLOYDB_OMNI_PORT="" -export ALLOYDB_OMNI_DATABASE="" -export ALLOYDB_OMNI_USER="" -export ALLOYDB_OMNI_PASSWORD="" -export ALLOYDB_OMNI_QUERY_PARAMS="" -``` - -Important configuration guidance: - -- Gemini CLI should be `v0.6.0` or newer. -- Load the variables from a `.env` file when possible. -- Connection settings are fixed at session start; restart Gemini to switch databases. -- Treat configuration as workspace-scoped by default, not user-global. - -For non-Gemini agents, or when the user needs a shared MCP endpoint, guide them to MCP Toolbox using the AlloyDB Omni prebuilt config rather than inventing a custom setup. - -For reusable project workflows, prefer generated workspace skills: - -```bash -toolbox --prebuilt alloydb-omni skills-generate \ - --name alloydb-omni-optimize \ - --toolset optimize \ - --description "AlloyDB Omni optimization skill" \ - --output-dir .agents/skills -``` - -If neither Gemini extensions nor MCP Toolbox are available, fall back to the manual Docker/Podman/Kubernetes/RPM workflows and `psql` diagnostics already documented in this skill's references. - ---- - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[Setup & Deployment](references/setup.md)** - - Container deployment (Docker/Podman), Kubernetes operator, local development workflows. -- **[Configuration](references/config.md)** - - Memory/CPU tuning, persistence volumes, networking, PostgreSQL parameter overrides. -- **[Kubernetes Operator](references/kubernetes-operator.md)** - - DBCluster CRD spec, HA failover, read replica scaling, rolling updates, backup annotations, health monitoring, upgrade procedures. -- **[RPM Deployment](references/rpm.md)** - - RHEL-family installation, `systemd` lifecycle, configuration, upgrades, and operational validation. -- **[Performance Diagnostics](references/performance.md)** - - Query planning, invalid index detection, bloat analysis, active query monitoring, columnar engine tuning, autovacuum, connection lifecycle. -- **[Gemini + MCP Guidance](references/gemini-mcp.md)** - - PostgreSQL extension install, env vars, and MCP Toolbox fallback guidance for Omni workflows. - ---- - -## Official References - -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [PostgreSQL / psql](https://github.com/cofin/flow/blob/main/templates/styleguides/databases/postgres_psql.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/alloydb-omni/agents/openai.yaml b/plugins/flow/skills/alloydb-omni/agents/openai.yaml deleted file mode 100644 index 68797c8..0000000 --- a/plugins/flow/skills/alloydb-omni/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "AlloyDB Omni" - short_description: "Containerized AlloyDB Omni setup, operators, RPMs, and local database workflows" diff --git a/plugins/flow/skills/alloydb-omni/references/config.md b/plugins/flow/skills/alloydb-omni/references/config.md deleted file mode 100644 index 28efc82..0000000 --- a/plugins/flow/skills/alloydb-omni/references/config.md +++ /dev/null @@ -1,177 +0,0 @@ -# AlloyDB Omni Configuration - -## Memory & CPU Tuning - -### Docker Resource Limits - -```bash -docker run -d \ - --name alloydb-omni \ - --cpus=4 \ - --memory=16g \ - --shm-size=256m \ - -e POSTGRES_PASSWORD=secret \ - -p 5432:5432 \ - -v alloydb-data:/var/lib/postgresql/data \ - google/alloydbomni:latest -``` - -### PostgreSQL Parameters - -```sql --- Connect and tune parameters -ALTER SYSTEM SET shared_buffers = '4GB'; -- 25% of container memory -ALTER SYSTEM SET effective_cache_size = '12GB'; -- 75% of container memory -ALTER SYSTEM SET work_mem = '64MB'; -ALTER SYSTEM SET maintenance_work_mem = '1GB'; -ALTER SYSTEM SET max_connections = 200; - --- Reload configuration -SELECT pg_reload_conf(); - --- Verify settings -SHOW shared_buffers; -SELECT name, setting, unit, source FROM pg_settings WHERE source = 'configuration file'; -``` - -### Kubernetes Resource Tuning - -```yaml -apiVersion: alloydbomni.dbadmin.goog/v1 -kind: DBCluster -metadata: - name: production-cluster -spec: - primarySpec: - resources: - cpu: "8" - memory: "32Gi" - parameters: - shared_buffers: "8GB" - effective_cache_size: "24GB" - work_mem: "128MB" - maintenance_work_mem: "2GB" - max_connections: "500" - random_page_cost: "1.1" - effective_io_concurrency: "200" -``` - -## Persistence Volumes - -### Docker Named Volumes - -```bash -# Create volume with specific driver options -docker volume create \ - --driver local \ - --opt type=none \ - --opt device=/mnt/ssd/alloydb \ - --opt o=bind \ - alloydb-data -``` - -### Kubernetes PVC - -```yaml -spec: - primarySpec: - persistence: - size: 500Gi - storageClass: premium-rwo # SSD-backed storage - accessModes: - - ReadWriteOnce -``` - -## Networking - -### Docker Networking - -```bash -# Create a dedicated network -docker network create alloydb-net - -# Run AlloyDB on the network -docker run -d \ - --name alloydb-omni \ - --network alloydb-net \ - -e POSTGRES_PASSWORD=secret \ - -v alloydb-data:/var/lib/postgresql/data \ - google/alloydbomni:latest - -# Other containers connect by name -docker run --network alloydb-net myapp \ - -e DATABASE_URL="postgresql://postgres:secret@alloydb-omni:5432/mydb" -``` - -### SSL/TLS Configuration - -```bash -# Mount custom SSL certificates -docker run -d \ - --name alloydb-omni \ - -e POSTGRES_PASSWORD=secret \ - -v alloydb-data:/var/lib/postgresql/data \ - -v ./certs/server.crt:/var/lib/postgresql/server.crt:ro \ - -v ./certs/server.key:/var/lib/postgresql/server.key:ro \ - -p 5432:5432 \ - google/alloydbomni:latest -``` - -```sql --- Enable SSL in PostgreSQL config -ALTER SYSTEM SET ssl = 'on'; -ALTER SYSTEM SET ssl_cert_file = '/var/lib/postgresql/server.crt'; -ALTER SYSTEM SET ssl_key_file = '/var/lib/postgresql/server.key'; -SELECT pg_reload_conf(); -``` - -## Columnar Engine Configuration - -```sql --- Enable the columnar engine extension -CREATE EXTENSION IF NOT EXISTS google_columnar_engine; - --- Add tables/columns to columnar cache -SELECT google_columnar_engine_add('analytics_events'); - --- Check columnar engine memory usage -SELECT * FROM g_columnar_memory_usage; - --- Configure memory limit for columnar engine -ALTER SYSTEM SET google_columnar_engine.memory_limit = '4GB'; -SELECT pg_reload_conf(); -``` - -## Logging - -```sql --- Configure query logging -ALTER SYSTEM SET log_min_duration_statement = 1000; -- Log queries > 1s -ALTER SYSTEM SET log_statement = 'ddl'; -- Log DDL statements -ALTER SYSTEM SET log_connections = 'on'; -ALTER SYSTEM SET log_disconnections = 'on'; -SELECT pg_reload_conf(); -``` - -```bash -# View container logs -docker logs -f alloydb-omni - -# Kubernetes logs -kubectl logs -f pod/my-omni-cluster-0 -``` - -## Health Checks - -```yaml -# docker-compose.yml health check -services: - alloydb: - image: google/alloydbomni:latest - healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres"] - interval: 10s - timeout: 5s - retries: 5 - start_period: 30s -``` diff --git a/plugins/flow/skills/alloydb-omni/references/gemini-mcp.md b/plugins/flow/skills/alloydb-omni/references/gemini-mcp.md deleted file mode 100644 index bd9b381..0000000 --- a/plugins/flow/skills/alloydb-omni/references/gemini-mcp.md +++ /dev/null @@ -1,58 +0,0 @@ -# Gemini CLI and MCP Toolbox - -## Agent Integration Order - -1. Gemini extension path -2. MCP Toolbox path for other agents or shared MCP setups -3. Manual Omni deployment plus `psql` fallback - -This keeps the skill usable across hosts instead of tying it to Gemini only. - -## Preferred Path for Gemini - -For AlloyDB Omni, use the dedicated Gemini CLI extension when available: - -```bash -gemini extensions install https://github.com/gemini-cli-extensions/alloydb-omni --auto-update -gemini extensions config alloydb-omni --scope workspace -``` - -The dedicated Omni extension exposes workspace-configured host/user/password settings and is a better default than treating every Omni workflow as generic PostgreSQL. - -## Environment Configuration - -```bash -export ALLOYDB_OMNI_HOST="" -export ALLOYDB_OMNI_PORT="" -export ALLOYDB_OMNI_DATABASE="" -export ALLOYDB_OMNI_USER="" -export ALLOYDB_OMNI_PASSWORD="" -export ALLOYDB_OMNI_QUERY_PARAMS="" -``` - -Notes: - -- Gemini CLI should be `v0.6.0+`. -- Load these values from a `.env` file when possible. -- Restart Gemini when switching databases or credentials. -- Keep this config at workspace scope by default. - -## MCP Toolbox Fallback - -For other LLMs or shared MCP setups, use the official AlloyDB Omni prebuilt config via Toolbox. - -```bash -gemini extensions install https://github.com/gemini-cli-extensions/mcp-toolbox --auto-update -``` - -For reusable workspace automation, generate project-local skills: - -```bash -toolbox --prebuilt alloydb-omni skills-generate \ - --name alloydb-omni-optimize \ - --toolset optimize \ - --description "AlloyDB Omni optimization skill" \ - --output-dir .agents/skills -``` - -If Toolbox is unavailable, fall back to the Docker, Kubernetes, RPM, and performance references in this skill. diff --git a/plugins/flow/skills/alloydb-omni/references/kubernetes-operator.md b/plugins/flow/skills/alloydb-omni/references/kubernetes-operator.md deleted file mode 100644 index b4fc251..0000000 --- a/plugins/flow/skills/alloydb-omni/references/kubernetes-operator.md +++ /dev/null @@ -1,324 +0,0 @@ -# AlloyDB Omni Kubernetes Operator - -## DBCluster CRD Spec - -The `DBCluster` custom resource is the primary API object managed by the AlloyDB Omni operator. - -**CRD group/version/kind:** `alloydbomni.dbadmin.goog/v1 / DBCluster` - -### Full Field Reference - -```yaml -apiVersion: alloydbomni.dbadmin.goog/v1 -kind: DBCluster -metadata: - name: - namespace: - annotations: {} # operator-recognized annotations (e.g. backup trigger) -spec: - databaseVersion: "15" # PostgreSQL major version - - primarySpec: - adminUser: - passwordRef: - name: # Secret key: db-password - resources: - cpu: "4" # vCPU request/limit - memory: "16Gi" # Memory request/limit - parameters: # PostgreSQL GUC overrides (ALTER SYSTEM equivalent) - max_connections: "200" - shared_buffers: "4GB" - effective_cache_size: "12GB" - work_mem: "64MB" - maintenance_work_mem: "1GB" - random_page_cost: "1.1" - effective_io_concurrency: "200" - availabilityOptions: - livenessProbe: Enabled # HTTP liveness check on the primary pod - standby: Disabled # Set to Enabled for HA automatic failover - persistence: - size: 100Gi - storageClass: standard-rwo - accessModes: - - ReadWriteOnce - - readPoolSpec: # Omit this section if read replicas are not needed - replicas: 2 - resources: - cpu: "2" - memory: "8Gi" - parameters: - max_connections: "200" - persistence: - size: 100Gi - storageClass: standard-rwo -``` - -### Secret for Admin Password - -```yaml -apiVersion: v1 -kind: Secret -metadata: - name: - namespace: -type: Opaque -stringData: - db-password: -``` - ---- - -## HA Failover Configuration - -Enable automatic high availability by setting `standby: Enabled` in `availabilityOptions`. The operator provisions a synchronous standby pod in a different availability zone (when the cluster spans zones) and promotes it automatically if the primary becomes unavailable. - -```yaml -apiVersion: alloydbomni.dbadmin.goog/v1 -kind: DBCluster -metadata: - name: ha-cluster - namespace: database -spec: - databaseVersion: "15" - primarySpec: - adminUser: - passwordRef: - name: db-password-secret - resources: - cpu: "4" - memory: "16Gi" - availabilityOptions: - livenessProbe: Enabled - standby: Enabled # Activates automatic failover - persistence: - size: 200Gi - storageClass: standard-rwo - readPoolSpec: - replicas: 2 - resources: - cpu: "2" - memory: "8Gi" -``` - -### Verifying HA Status - -```bash -# Check cluster condition for HA readiness -kubectl get dbcluster ha-cluster -n database -o jsonpath='{.status.conditions}' | jq . - -# Describe to see standby pod references and replication lag -kubectl describe dbcluster ha-cluster -n database - -# Confirm both primary and standby pods are Running -kubectl get pods -n database -l dbcluster=ha-cluster -``` - -### Manual Failover Trigger (Testing) - -```bash -# Delete the primary pod to trigger operator-managed promotion -kubectl delete pod ha-cluster-0 -n database - -# Watch the operator promote the standby -kubectl get pods -n database -w -kubectl describe dbcluster ha-cluster -n database | grep -A5 "Conditions:" -``` - ---- - -## Scaling Read Replicas - -Scale read replicas up or down with a single patch. The operator adds or removes replica pods without touching the primary. - -```bash -# Scale to 3 read replicas -kubectl patch dbcluster -n \ - --type=merge \ - -p '{"spec":{"readPoolSpec":{"replicas":3}}}' - -# Scale to 0 (remove all read replicas) -kubectl patch dbcluster -n \ - --type=merge \ - -p '{"spec":{"readPoolSpec":{"replicas":0}}}' - -# Watch replica pod creation -kubectl get pods -n -w - -# Verify the read service endpoint is updated -kubectl get endpoints -n | grep "\-ro" -``` - -### Read vs. Read-Write Services - -The operator creates two Services automatically: - -| Service suffix | Routes to | Use for | -|---|---|---| -| `-rw` | Primary only | All writes and read-after-write queries | -| `-ro` | Read replicas | Read-only analytical or reporting queries | - -```bash -kubectl get svc -n -# NAME TYPE CLUSTER-IP PORT(S) -# -rw ClusterIP 10.x.x.x 5432/TCP -# -ro ClusterIP 10.x.x.x 5432/TCP -``` - ---- - -## Rolling Update Patterns - -### Parameter Updates (No Downtime) - -Patching `primarySpec.parameters` causes the operator to apply parameters via `ALTER SYSTEM` and perform a coordinated rolling restart of the primary and replicas. - -```bash -# Update max_connections (triggers rolling restart) -kubectl patch dbcluster -n \ - --type=merge \ - -p '{"spec":{"primarySpec":{"parameters":{"max_connections":"300","work_mem":"128MB"}}}}' - -# Monitor the rolling restart -kubectl rollout status statefulset/ -n -kubectl get pods -n -w -``` - -### Resource Updates - -```bash -# Increase CPU and memory -kubectl patch dbcluster -n \ - --type=merge \ - -p '{"spec":{"primarySpec":{"resources":{"cpu":"8","memory":"32Gi"}}}}' -``` - -### Monitoring Operator Reconciliation - -```bash -# Stream operator logs during a change -kubectl logs -n alloydb-omni-system deployment/alloydb-omni-operator -f - -# Check DBCluster conditions for reconciliation status -kubectl get dbcluster -n -o yaml | grep -A 20 "conditions:" -``` - ---- - -## Backup Annotation Patterns - -The operator supports on-demand backup via an annotation. Backups are stored according to the backup configuration in the DBCluster spec (if configured) or the operator's default storage location. - -```bash -# Trigger an immediate backup -kubectl annotate dbcluster -n \ - alloydbomni.dbadmin.goog/backup=true --overwrite - -# List backup objects (if CRD is available) -kubectl get dbbackups -n - -# Describe a specific backup -kubectl describe dbbackup -n -``` - -### Scheduled Backup Configuration - -```yaml -spec: - primarySpec: - backupConfiguration: - enabled: true - schedule: "0 2 * * *" # Daily at 02:00 UTC - retainedBackups: 7 # Keep last 7 backups - location: gs:///backups # GCS bucket or other configured backend -``` - ---- - -## Health Monitoring - -### Readiness and Liveness Probes - -The operator injects probes into the database pods automatically when `livenessProbe: Enabled` is set. For custom probe configuration via the underlying StatefulSet: - -```bash -# Check probe status on the primary pod -kubectl describe pod -0 -n | grep -A 15 "Liveness:\|Readiness:" - -# Check events for probe failures -kubectl get events -n --sort-by='.lastTimestamp' | grep -i "liveness\|readiness\|unhealthy" -``` - -### Cluster Health via pg_isready - -```bash -# Direct readiness check via port-forward -kubectl port-forward svc/-rw -n 5432:5432 & -pg_isready -h localhost -U postgres -kill %1 -``` - -### Operator-Level Health - -```bash -# Check the operator deployment health -kubectl get deployment alloydb-omni-operator -n alloydb-omni-system -kubectl describe deployment alloydb-omni-operator -n alloydb-omni-system - -# Check operator metrics (if metrics endpoint is exposed) -kubectl port-forward deployment/alloydb-omni-operator -n alloydb-omni-system 8080:8080 & -curl -s http://localhost:8080/metrics | grep dbcluster -``` - -### DBCluster Status Conditions - -```bash -# Full status output -kubectl get dbcluster -n -o jsonpath='{.status}' | jq . - -# Key conditions to check: -# Ready: True — cluster is fully operational -# Reconciling: False — no pending changes -# Degraded: False — no pod failures -``` - ---- - -## Upgrade Procedures - -### Minor Version / Image Tag Update - -```bash -# Update the database version field (triggers rolling upgrade) -kubectl patch dbcluster -n \ - --type=merge \ - -p '{"spec":{"databaseVersion":"15.5"}}' - -# Watch pods restart in order (replicas first, then primary) -kubectl get pods -n -w - -# Verify version after upgrade -kubectl port-forward svc/-rw -n 5432:5432 & -psql -h localhost -U postgres -c "SELECT version();" -``` - -### Operator Upgrade - -```bash -# Apply the new operator manifest (replaces the existing operator deployment) -kubectl apply -f https://storage.googleapis.com/alloydb-omni-operator//alloydb-omni-operator.yaml - -# Wait for the operator pod to roll over -kubectl rollout status deployment/alloydb-omni-operator -n alloydb-omni-system - -# Confirm CRD schema was updated -kubectl get crd dbclusters.alloydbomni.dbadmin.goog -o jsonpath='{.spec.versions[*].name}' -``` - -### Pre-Upgrade Checklist - -- [ ] Confirm current cluster status is `Ready` and not `Reconciling` -- [ ] Take a manual backup annotation before upgrading -- [ ] Review operator release notes for CRD schema changes -- [ ] Test the upgrade on a non-production cluster first -- [ ] Verify replication lag is 0 before upgrading the primary pod diff --git a/plugins/flow/skills/alloydb-omni/references/performance.md b/plugins/flow/skills/alloydb-omni/references/performance.md deleted file mode 100644 index 81c2410..0000000 --- a/plugins/flow/skills/alloydb-omni/references/performance.md +++ /dev/null @@ -1,464 +0,0 @@ -# AlloyDB Omni Performance Diagnostics - -## Query Planning with EXPLAIN ANALYZE - -### Basic Query Analysis - -```sql --- Full analysis with buffer and timing detail -EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) -SELECT * FROM orders WHERE customer_id = 42 AND status = 'pending'; - --- Output key fields to check: --- Seq Scan vs Index Scan → missing index if Seq Scan on large table --- Rows=N (actual vs estimated) → stale statistics if large discrepancy --- Buffers: hit=N read=N → low hit ratio = working set exceeds shared_buffers --- Execution Time → total wall-clock time including planning -``` - -### Identifying Sequential Scans on Large Tables - -```sql --- Find tables with high sequential scan counts (candidates for new indexes) -SELECT - relname AS table_name, - seq_scan, - seq_tup_read, - idx_scan, - idx_tup_fetch, - ROUND(seq_tup_read::numeric / NULLIF(seq_scan, 0), 0) AS avg_rows_per_seq_scan -FROM pg_stat_user_tables -WHERE seq_scan > 0 -ORDER BY seq_tup_read DESC -LIMIT 20; -``` - -### Join and Sort Diagnostics - -```sql --- Enable timing for all sessions (dev/staging only) -SET track_io_timing = on; - --- Identify sort spill to disk -EXPLAIN (ANALYZE, BUFFERS) -SELECT customer_id, COUNT(*) FROM orders GROUP BY customer_id ORDER BY 2 DESC; --- Look for: "Sort Method: external merge Disk: NNkB" --- Fix: increase work_mem for the session or globally - --- Temporarily increase work_mem for a single query -SET LOCAL work_mem = '256MB'; -EXPLAIN (ANALYZE, BUFFERS) -SELECT ...; -RESET work_mem; -``` - ---- - -## Invalid Index Detection - -Invalid indexes occur after a failed `CREATE INDEX CONCURRENTLY` or a failed `REINDEX CONCURRENTLY`. They consume storage but are not used by the planner. - -```sql --- Detect all invalid indexes -SELECT - n.nspname AS schema_name, - c.relname AS table_name, - i.relname AS index_name, - pg_size_pretty(pg_relation_size(i.oid)) AS index_size -FROM pg_index x -JOIN pg_class c ON c.oid = x.indrelid -JOIN pg_class i ON i.oid = x.indexrelid -JOIN pg_namespace n ON n.oid = c.relnamespace -WHERE x.indisvalid = false -ORDER BY pg_relation_size(i.oid) DESC; -``` - -### Rebuilding Invalid Indexes - -```sql --- Rebuild without locking writes (preferred for production) -REINDEX INDEX CONCURRENTLY schema_name.index_name; - --- Rebuild all indexes on a table concurrently -REINDEX TABLE CONCURRENTLY schema_name.table_name; - --- Drop and recreate if REINDEX CONCURRENTLY is not viable -DROP INDEX CONCURRENTLY schema_name.index_name; -CREATE INDEX CONCURRENTLY index_name ON schema_name.table_name (column_name); -``` - -### Unused Index Detection - -```sql --- Find indexes that have never been used since last stats reset -SELECT - n.nspname AS schema_name, - t.relname AS table_name, - i.relname AS index_name, - pg_size_pretty(pg_relation_size(i.oid)) AS index_size, - s.idx_scan AS times_used -FROM pg_index x -JOIN pg_class t ON t.oid = x.indrelid -JOIN pg_class i ON i.oid = x.indexrelid -JOIN pg_namespace n ON n.oid = t.relnamespace -LEFT JOIN pg_stat_user_indexes s ON s.indexrelid = x.indexrelid -WHERE x.indisunique = false - AND x.indisprimary = false - AND s.idx_scan = 0 -ORDER BY pg_relation_size(i.oid) DESC; -``` - ---- - -## Bloat Detection - -### Dead Tuple Ratio (pg_stat_user_tables) - -```sql --- Tables with high dead-tuple ratio (candidates for VACUUM) -SELECT - schemaname, - relname AS table_name, - n_live_tup, - n_dead_tup, - ROUND( - 100.0 * n_dead_tup / NULLIF(n_live_tup + n_dead_tup, 0), 2 - ) AS dead_tuple_pct, - last_vacuum, - last_autovacuum, - last_analyze, - last_autoanalyze -FROM pg_stat_user_tables -WHERE n_dead_tup > 1000 -ORDER BY dead_tuple_pct DESC -LIMIT 20; - --- Manually trigger VACUUM ANALYZE on a bloated table -VACUUM (ANALYZE, VERBOSE) schema_name.table_name; -``` - -### Estimating Physical Table Bloat - -```sql --- Estimate wasted storage from table bloat (requires pg_class access) -SELECT - n.nspname AS schema_name, - c.relname AS table_name, - pg_size_pretty(pg_total_relation_size(c.oid)) AS total_size, - pg_size_pretty(pg_relation_size(c.oid)) AS table_size, - pg_size_pretty( - pg_total_relation_size(c.oid) - pg_relation_size(c.oid) - ) AS index_size, - c.reltuples::bigint AS estimated_rows -FROM pg_class c -JOIN pg_namespace n ON n.oid = c.relnamespace -WHERE c.relkind = 'r' - AND n.nspname NOT IN ('pg_catalog', 'information_schema') -ORDER BY pg_total_relation_size(c.oid) DESC -LIMIT 20; -``` - -### Index Bloat Estimation - -```sql --- Approximate index bloat via pg_stat_user_indexes + pg_class -SELECT - s.schemaname, - s.relname AS table_name, - s.indexrelname AS index_name, - pg_size_pretty(pg_relation_size(s.indexrelid)) AS index_size, - s.idx_scan, - s.idx_tup_read, - s.idx_tup_fetch -FROM pg_stat_user_indexes s -JOIN pg_index i ON i.indexrelid = s.indexrelid -WHERE NOT i.indisprimary -ORDER BY pg_relation_size(s.indexrelid) DESC -LIMIT 20; -``` - ---- - -## Active Query Monitoring - -### Current Active Queries - -```sql --- All non-idle queries with runtime -SELECT - pid, - usename, - application_name, - state, - wait_event_type, - wait_event, - NOW() - query_start AS duration, - LEFT(query, 200) AS query_snippet -FROM pg_stat_activity -WHERE state != 'idle' - AND pid != pg_backend_pid() -ORDER BY duration DESC NULLS LAST; -``` - -### Long-Running Queries - -```sql --- Queries running longer than 30 seconds -SELECT - pid, - usename, - state, - wait_event_type, - wait_event, - NOW() - query_start AS duration, - LEFT(query, 300) AS query_snippet -FROM pg_stat_activity -WHERE state = 'active' - AND query_start < NOW() - INTERVAL '30 seconds' -ORDER BY duration DESC; - --- Terminate a specific long-running query (graceful) -SELECT pg_cancel_backend(); - --- Force-terminate (use as last resort) -SELECT pg_terminate_backend(); -``` - -### Lock Waits - -```sql --- Sessions waiting on locks with blocking query info -SELECT - blocked.pid AS blocked_pid, - blocked.usename AS blocked_user, - blocked.query AS blocked_query, - blocking.pid AS blocking_pid, - blocking.usename AS blocking_user, - blocking.query AS blocking_query, - NOW() - blocked.query_start AS wait_duration -FROM pg_stat_activity blocked -JOIN pg_stat_activity blocking - ON blocking.pid = ANY(pg_blocking_pids(blocked.pid)) -WHERE blocked.cardinality(pg_blocking_pids(blocked.pid)) > 0; -``` - ---- - -## Columnar Engine: Memory Limit Tuning - -### Setting the Memory Limit - -The columnar engine caches column data in a dedicated memory pool separate from `shared_buffers`. Set the limit based on the data volume of analytical tables and available host memory. - -```sql --- View current memory limit -SHOW google_columnar_engine.memory_limit; - --- Set memory limit (recommended: 10–25% of total memory, minimum 1GB) -ALTER SYSTEM SET google_columnar_engine.memory_limit = '4GB'; -SELECT pg_reload_conf(); - --- Inspect current columnar cache utilization -SELECT - relation_name, - pg_size_pretty(memory_usage_bytes) AS memory_used, - total_columns_cached, - hit_count, - miss_count, - ROUND( - 100.0 * hit_count / NULLIF(hit_count + miss_count, 0), 2 - ) AS hit_pct -FROM g_columnar_memory_usage -ORDER BY memory_usage_bytes DESC; -``` - -### Recommended Columns Analysis - -Identify tables/columns that benefit most from columnar caching: - -```sql --- Tables with high sequential scan volume and many columns (wide analytical tables) -SELECT - t.relname AS table_name, - COUNT(a.attnum) AS column_count, - s.seq_scan, - s.seq_tup_read, - pg_size_pretty(pg_total_relation_size(t.oid)) AS total_size -FROM pg_class t -JOIN pg_namespace n ON n.oid = t.relnamespace -JOIN pg_stat_user_tables s ON s.relname = t.relname AND s.schemaname = n.nspname -JOIN pg_attribute a ON a.attrelid = t.oid AND a.attnum > 0 AND NOT a.attisdropped -WHERE n.nspname NOT IN ('pg_catalog', 'information_schema') - AND t.relkind = 'r' - AND s.seq_scan > 100 -GROUP BY t.relname, t.oid, s.seq_scan, s.seq_tup_read -ORDER BY s.seq_tup_read DESC -LIMIT 20; - --- Add a table to the columnar cache -SELECT google_columnar_engine_add('schema_name.table_name'); - --- Add specific columns only -SELECT google_columnar_engine_add('schema_name.table_name', 'col1,col2,col3'); - --- Remove a table from the columnar cache -SELECT google_columnar_engine_delete('schema_name.table_name'); -``` - -### Cost/Benefit Verification - -```sql --- Before adding to columnar cache: capture baseline plan -EXPLAIN (ANALYZE, BUFFERS) -SELECT SUM(amount), category FROM orders WHERE created_at > NOW() - INTERVAL '90 days' -GROUP BY category; - --- Add table to columnar cache -SELECT google_columnar_engine_add('orders'); - --- After: verify columnar scan is used -EXPLAIN (ANALYZE, BUFFERS) -SELECT SUM(amount), category FROM orders WHERE created_at > NOW() - INTERVAL '90 days' -GROUP BY category; --- Look for: "Custom Scan (columnar scan)" replacing "Seq Scan" -``` - ---- - -## Autovacuum Configuration Per-Table - -### Override Autovacuum Settings for High-Churn Tables - -```sql --- Aggressive autovacuum for a high-write table -ALTER TABLE schema_name.high_churn_table SET ( - autovacuum_vacuum_scale_factor = 0.01, -- vacuum when 1% rows are dead (default 0.2) - autovacuum_analyze_scale_factor = 0.005, -- analyze when 0.5% rows change (default 0.1) - autovacuum_vacuum_cost_delay = 2, -- ms between vacuum I/O bursts (default 20) - autovacuum_vacuum_threshold = 50 -- minimum dead rows before vacuum fires -); - --- Disable autovacuum on a table managed by manual VACUUM (e.g., append-only) -ALTER TABLE schema_name.append_only_table SET (autovacuum_enabled = false); - --- View per-table autovacuum settings -SELECT - relname, - reloptions -FROM pg_class -WHERE reloptions IS NOT NULL - AND relkind = 'r' -ORDER BY relname; -``` - -### Global Autovacuum Tuning - -```sql --- For environments with many tables or high write throughput -ALTER SYSTEM SET autovacuum_max_workers = 6; -- default 3 -ALTER SYSTEM SET autovacuum_vacuum_cost_delay = '2ms'; -- default 20ms -ALTER SYSTEM SET autovacuum_naptime = '30s'; -- default 60s -SELECT pg_reload_conf(); - --- Check autovacuum worker activity -SELECT pid, usename, query, NOW() - query_start AS duration -FROM pg_stat_activity -WHERE query LIKE 'autovacuum:%' -ORDER BY duration DESC; -``` - ---- - -## Connection Lifecycle and Session Management - -### Connection Pool Sizing - -```sql --- View current connection counts by state -SELECT - state, - COUNT(*) AS connections, - MAX(NOW() - state_change) AS max_age_in_state -FROM pg_stat_activity -GROUP BY state -ORDER BY connections DESC; - --- View connections per database -SELECT - datname, - COUNT(*) AS connections, - MAX(NOW() - backend_start) AS oldest_connection -FROM pg_stat_activity -GROUP BY datname -ORDER BY connections DESC; - --- Check max_connections limit -SHOW max_connections; -SELECT COUNT(*) FROM pg_stat_activity; -- current usage -``` - -### Idle Connection Cleanup - -```sql --- Find long-idle connections (candidates for termination or pool timeout tuning) -SELECT - pid, - usename, - application_name, - state, - NOW() - state_change AS idle_duration, - NOW() - backend_start AS session_age -FROM pg_stat_activity -WHERE state = 'idle' - AND state_change < NOW() - INTERVAL '10 minutes' -ORDER BY idle_duration DESC; - --- Configure idle session timeout (kills sessions idle for longer than N ms) -ALTER SYSTEM SET idle_session_timeout = '10min'; -ALTER SYSTEM SET idle_in_transaction_session_timeout = '5min'; -- kills idle-in-xact -SELECT pg_reload_conf(); -``` - -### Prepared Transaction Cleanup - -```sql --- Detect forgotten prepared transactions (can block vacuum and autovacuum) -SELECT - gid, - prepared, - owner, - database, - NOW() - prepared AS age -FROM pg_prepared_xacts -ORDER BY prepared; - --- Roll back a stuck prepared transaction -ROLLBACK PREPARED ''; -``` - -### Session-Level Diagnostics - -```sql --- Memory usage per backend (requires pg_backend_memory_contexts — AlloyDB Omni 15+) -SELECT - pid, - context_name, - pg_size_pretty(used_bytes) AS used, - pg_size_pretty(free_bytes) AS free -FROM pg_backend_memory_contexts -WHERE pid = -ORDER BY used_bytes DESC; - --- Check for waiting backends and their lock types -SELECT - pid, - locktype, - relation::regclass, - mode, - granted, - NOW() - query_start AS wait_time -FROM pg_locks l -JOIN pg_stat_activity a USING (pid) -WHERE NOT granted -ORDER BY wait_time DESC NULLS LAST; -``` diff --git a/plugins/flow/skills/alloydb-omni/references/rpm.md b/plugins/flow/skills/alloydb-omni/references/rpm.md deleted file mode 100644 index 22176da..0000000 --- a/plugins/flow/skills/alloydb-omni/references/rpm.md +++ /dev/null @@ -1,94 +0,0 @@ -# RPM Deployment - -## Use When - -Use the RPM path for: - -- RHEL, Rocky Linux, AlmaLinux, CentOS Stream, or Oracle Linux hosts -- VM and bare-metal deployments without container orchestration -- Environments where `systemd` lifecycle management is preferred over Docker or Kubernetes - -## Install - -```bash -sudo tee /etc/yum.repos.d/alloydb-omni.repo << 'EOF' -[alloydb-omni] -name=AlloyDB Omni -baseurl=https://storage.googleapis.com/alloydb-omni-yum/ -enabled=1 -gpgcheck=0 -EOF - -sudo yum install -y alloydbomni -``` - -## Initialize - -```bash -sudo mkdir -p /var/lib/alloydb/data -sudo alloydb-omni init --data-dir=/var/lib/alloydb/data -``` - -## Service Lifecycle - -```bash -sudo systemctl enable --now alloydb-omni -sudo systemctl status alloydb-omni -sudo journalctl -u alloydb-omni -f -``` - -## Post-Install Configuration - -Primary config file: - -```text -/var/lib/alloydb/data/postgresql.conf -``` - -Recommended first-pass tuning: - -```bash -sudo -u postgres psql -c "ALTER SYSTEM SET shared_buffers = '4GB';" -sudo -u postgres psql -c "ALTER SYSTEM SET effective_cache_size = '12GB';" -sudo -u postgres psql -c "ALTER SYSTEM SET work_mem = '64MB';" -sudo -u postgres psql -c "ALTER SYSTEM SET maintenance_work_mem = '1GB';" -sudo systemctl restart alloydb-omni -``` - -Enable key extensions after restart: - -```bash -sudo -u postgres psql -c "CREATE EXTENSION IF NOT EXISTS google_columnar_engine;" -sudo -u postgres psql -c "CREATE EXTENSION IF NOT EXISTS vector;" -``` - -## Validation - -```bash -psql -h localhost -U postgres -c "SELECT version();" -sudo systemctl is-active alloydb-omni -sudo -u postgres psql -c "SELECT extname FROM pg_extension ORDER BY extname;" -``` - -Verify: - -- service is active after boot -- data lives on persistent local storage -- PostgreSQL parameters applied after restart -- required extensions are available - -## Upgrades - -```bash -psql -h localhost -U postgres -c "SELECT version();" -sudo yum update -y alloydbomni -sudo systemctl restart alloydb-omni -psql -h localhost -U postgres -c "SELECT version();" -``` - -After upgrades: - -- confirm the service restarted cleanly -- re-check extension availability -- review `journalctl -u alloydb-omni` for startup warnings -- run representative health and query checks before handing the system back to users diff --git a/plugins/flow/skills/alloydb-omni/references/setup.md b/plugins/flow/skills/alloydb-omni/references/setup.md deleted file mode 100644 index 7b8dd29..0000000 --- a/plugins/flow/skills/alloydb-omni/references/setup.md +++ /dev/null @@ -1,326 +0,0 @@ -# AlloyDB Omni Setup & Deployment - -## Docker Deployment - -```bash -# Pull the AlloyDB Omni image -docker pull google/alloydbomni:latest - -# Run with persistent storage -docker run -d \ - --name alloydb-omni \ - -e POSTGRES_PASSWORD=mysecretpassword \ - -p 5432:5432 \ - -v alloydb-data:/var/lib/postgresql/data \ - google/alloydbomni:latest - -# Connect -psql -h localhost -U postgres -``` - -### Docker Compose - -```yaml -# docker-compose.yml -services: - alloydb: - image: google/alloydbomni:latest - container_name: alloydb-omni - environment: - POSTGRES_PASSWORD: mysecretpassword - POSTGRES_DB: myapp - POSTGRES_USER: postgres - ports: - - "5432:5432" - volumes: - - alloydb-data:/var/lib/postgresql/data - - ./init-scripts:/docker-entrypoint-initdb.d - restart: unless-stopped - shm_size: '256m' - -volumes: - alloydb-data: -``` - -## Podman Deployment - -```bash -# Run with Podman (rootless) -podman run -d \ - --name alloydb-omni \ - -e POSTGRES_PASSWORD=mysecretpassword \ - -p 5432:5432 \ - -v alloydb-data:/var/lib/postgresql/data:Z \ - google/alloydbomni:latest -``` - -## RPM Installation (Bare Metal / VM) - -For RHEL, CentOS, Rocky Linux, or Oracle Linux deployments without containers. - -```bash -# Add the AlloyDB Omni repository -sudo tee /etc/yum.repos.d/alloydb-omni.repo << 'EOF' -[alloydb-omni] -name=AlloyDB Omni -baseurl=https://storage.googleapis.com/alloydb-omni-yum/ -enabled=1 -gpgcheck=0 -EOF - -# Install AlloyDB Omni -sudo yum install -y alloydbomni - -# Initialize the database cluster -sudo alloydb-omni init --data-dir=/var/lib/alloydb/data - -# Start the service -sudo systemctl enable --now alloydb-omni - -# Verify -sudo systemctl status alloydb-omni -psql -h localhost -U postgres -``` - -### RPM Configuration - -```bash -# Configuration file location -/var/lib/alloydb/data/postgresql.conf - -# Key settings to tune after install -sudo -u postgres psql -c "ALTER SYSTEM SET shared_buffers = '4GB';" -sudo -u postgres psql -c "ALTER SYSTEM SET effective_cache_size = '12GB';" -sudo -u postgres psql -c "ALTER SYSTEM SET work_mem = '64MB';" -sudo -u postgres psql -c "ALTER SYSTEM SET maintenance_work_mem = '1GB';" - -# Restart to apply -sudo systemctl restart alloydb-omni - -# Enable extensions -sudo -u postgres psql -c "CREATE EXTENSION IF NOT EXISTS google_columnar_engine;" -sudo -u postgres psql -c "CREATE EXTENSION IF NOT EXISTS vector;" -``` - -### Upgrading RPM Installation - -```bash -# Check current version -psql -h localhost -U postgres -c "SELECT version();" - -# Update package -sudo yum update -y alloydbomni - -# Restart -sudo systemctl restart alloydb-omni -``` - -## Kubernetes Operator - -### Prerequisites - -- Kubernetes 1.25+ cluster (GKE Autopilot, standard GKE, EKS, AKS, or on-prem) -- `kubectl` configured with cluster admin access -- cert-manager installed (the operator depends on it for webhook TLS) - -```bash -# Install cert-manager if not present -kubectl apply -f https://github.com/cert-manager/cert-manager/releases/latest/download/cert-manager.yaml -kubectl wait --for=condition=available --timeout=120s \ - deployment/cert-manager -n cert-manager -``` - -### Install the Operator - -```bash -# Install the AlloyDB Omni Kubernetes operator -kubectl apply -f https://storage.googleapis.com/alloydb-omni-operator/latest/alloydb-omni-operator.yaml - -# Wait for operator to be ready -kubectl wait --for=condition=available --timeout=120s \ - deployment/alloydb-omni-operator -n alloydb-omni-system - -# Verify CRDs are registered -kubectl get crd | grep alloydbomni -# Expected: dbclusters.alloydbomni.dbadmin.goog -``` - -### DBCluster Custom Resource - -```yaml -# alloydb-cluster.yaml -apiVersion: alloydbomni.dbadmin.goog/v1 -kind: DBCluster -metadata: - name: my-omni-cluster - namespace: database -spec: - databaseVersion: "15" - primarySpec: - adminUser: - passwordRef: - name: db-password-secret - resources: - cpu: "4" - memory: "16Gi" - parameters: - max_connections: "200" - shared_buffers: "4GB" - work_mem: "64MB" - availabilityOptions: - livenessProbe: Enabled - persistence: - size: 100Gi - storageClass: standard-rwo - # Optional: read pool for scaling reads - readPoolSpec: - replicas: 2 - resources: - cpu: "2" - memory: "8Gi" ---- -apiVersion: v1 -kind: Secret -metadata: - name: db-password-secret - namespace: database -type: Opaque -stringData: - db-password: mysecretpassword -``` - -```bash -# Create namespace and apply -kubectl create namespace database -kubectl apply -f alloydb-cluster.yaml - -# Check status -kubectl get dbclusters -n database -kubectl describe dbcluster my-omni-cluster -n database - -# Watch for readiness -kubectl wait --for=condition=Ready --timeout=300s \ - dbcluster/my-omni-cluster -n database -``` - -### Connecting to the Operator-Managed Cluster - -```bash -# Get the connection service -kubectl get svc -n database | grep my-omni-cluster - -# Port-forward for local access -kubectl port-forward svc/my-omni-cluster-rw -n database 5432:5432 - -# Connect -PGPASSWORD=mysecretpassword psql -h localhost -U postgres -``` - -### Operator Lifecycle Management - -```bash -# Scale read replicas -kubectl patch dbcluster my-omni-cluster -n database \ - --type=merge -p '{"spec":{"readPoolSpec":{"replicas":3}}}' - -# Update parameters (rolling restart) -kubectl patch dbcluster my-omni-cluster -n database \ - --type=merge -p '{"spec":{"primarySpec":{"parameters":{"max_connections":"300"}}}}' - -# Check operator logs -kubectl logs -n alloydb-omni-system deployment/alloydb-omni-operator -f - -# Backup (if backup configuration is set) -kubectl annotate dbcluster my-omni-cluster -n database \ - alloydbomni.dbadmin.goog/backup=true - -# Delete cluster (data persists in PVC) -kubectl delete dbcluster my-omni-cluster -n database -``` - -### High Availability with Operator - -```yaml -# HA configuration with automatic failover -apiVersion: alloydbomni.dbadmin.goog/v1 -kind: DBCluster -metadata: - name: ha-cluster -spec: - databaseVersion: "15" - primarySpec: - adminUser: - passwordRef: - name: db-password-secret - resources: - cpu: "4" - memory: "16Gi" - availabilityOptions: - livenessProbe: Enabled - standby: Enabled - persistence: - size: 200Gi - storageClass: standard-rwo - readPoolSpec: - replicas: 2 - resources: - cpu: "2" - memory: "8Gi" -``` - -## Local Development Workflow - -```bash -# Quick start for local dev -docker run -d \ - --name alloydb-dev \ - -e POSTGRES_PASSWORD=dev \ - -e POSTGRES_DB=myapp_dev \ - -p 5432:5432 \ - -v alloydb-dev-data:/var/lib/postgresql/data \ - google/alloydbomni:latest - -# Load initial schema -psql -h localhost -U postgres -d myapp_dev -f schema.sql - -# Run application tests -DATABASE_URL="postgresql://postgres:dev@localhost:5432/myapp_dev" make test -``` - -### Initialization Scripts - -Place `.sql` or `.sh` files in `/docker-entrypoint-initdb.d/` to run on first start: - -```sql --- init-scripts/01-extensions.sql -CREATE EXTENSION IF NOT EXISTS vector; -CREATE EXTENSION IF NOT EXISTS pg_trgm; -CREATE EXTENSION IF NOT EXISTS google_columnar_engine; - --- init-scripts/02-schema.sql -CREATE TABLE users ( - id SERIAL PRIMARY KEY, - email TEXT UNIQUE NOT NULL, - name TEXT NOT NULL, - embedding vector(768), - created_at TIMESTAMPTZ DEFAULT NOW() -); -``` - -## Upgrading - -```bash -# Pull the latest image -docker pull google/alloydbomni:latest - -# Stop and remove old container (data is in the volume) -docker stop alloydb-omni && docker rm alloydb-omni - -# Start with new image -docker run -d \ - --name alloydb-omni \ - -e POSTGRES_PASSWORD=mysecretpassword \ - -p 5432:5432 \ - -v alloydb-data:/var/lib/postgresql/data \ - google/alloydbomni:latest -``` diff --git a/plugins/flow/skills/alloydb/SKILL.md b/plugins/flow/skills/alloydb/SKILL.md deleted file mode 100644 index 931a16f..0000000 --- a/plugins/flow/skills/alloydb/SKILL.md +++ /dev/null @@ -1,374 +0,0 @@ ---- -name: alloydb -description: "Use when provisioning Google AlloyDB, configuring clusters or read pools, enabling columnar engine, setting up Private Service Access, tuning managed PostgreSQL on GCP, or migrating from Cloud SQL to AlloyDB." ---- - -# AlloyDB - -## Overview - -AlloyDB is a fully managed, PostgreSQL-compatible database service on Google Cloud. It combines the familiarity of PostgreSQL with Google's storage and compute innovations for high performance and availability. - -## Operating Layers - -Use this skill in three distinct layers: - -1. **Provision** the managed database on GCP. -2. **Connect** an agent or client to the database. -3. **Operate** the database with tuning, observability, backups, and failover guidance. - -Keep those layers separate when giving guidance. Provisioning is not the same thing as agent connectivity. - -## Quick Reference - -### AlloyDB vs Standard PostgreSQL - -| Feature | AlloyDB | Cloud SQL for PostgreSQL | -|---|---|---| -| Storage | Disaggregated, log-based | Attached disk | -| Columnar engine | Built-in adaptive columnar cache | Not available | -| ML embeddings | Native Vertex AI integration | Manual setup | -| Read scaling | Read pool (auto-managed replicas) | Manual read replicas | -| Availability | 99.99% SLA (regional) | 99.95% SLA | -| Networking | Private IP only (PSA required) | Public or private IP | - -### Key Commands - -| Action | Command | -|---|---| -| Create cluster | `gcloud alloydb clusters create NAME --region=REGION --network=NETWORK --password=PASS` | -| Create primary | `gcloud alloydb instances create NAME --cluster=CLUSTER --region=REGION --instance-type=PRIMARY --cpu-count=N` | -| Create read pool | `gcloud alloydb instances create NAME --cluster=CLUSTER --region=REGION --instance-type=READ_POOL --read-pool-node-count=N` | -| Connect via proxy | `./alloydb-auth-proxy "projects/P/locations/R/clusters/C/instances/I" --port=5432` | -| Enable columnar engine | `SELECT google_columnar_engine_add('table_name');` | - -### Connection Pattern - -```bash -# From GCE VM in same VPC (private IP) -psql "host=ALLOYDB_IP dbname=postgres user=postgres sslmode=require" - -# Via AlloyDB Auth Proxy (recommended for external access) -./alloydb-auth-proxy \ - "projects/PROJECT/locations/REGION/clusters/CLUSTER/instances/INSTANCE" \ - --port=5432 - -psql "host=127.0.0.1 port=5432 dbname=postgres user=postgres" -``` - - - -## Workflow - -### Step 1: Set Up Private Service Access - -AlloyDB requires Private Service Access (PSA) before any cluster can be created. Allocate an IP range and create the VPC peering connection. - -### Step 2: Create Cluster and Primary Instance - -Create the cluster with `gcloud alloydb clusters create`, then add a primary instance. Choose CPU count based on workload (start with 4 vCPUs for small workloads). - -### Step 3: Configure Read Pool (if needed) - -For read-heavy workloads, add a read pool with `--instance-type=READ_POOL`. AlloyDB automatically manages the read replicas within the pool. - -### Step 4: Enable Columnar Engine (for analytics) - -For analytical query patterns, enable the columnar engine on tables with `SELECT google_columnar_engine_add('table')`. Check `g_columnar_recommended_columns` for automatic recommendations. - -### Step 5: Connect Applications - -Use the AlloyDB Auth Proxy for connections from outside the VPC. For applications within GCE/GKE on the same VPC, connect directly via private IP. - - - -## Host Integration Order - -Use the lowest-admin supported path for the current host, and degrade cleanly: - -1. **Gemini CLI**: use the dedicated `alloydb` and `alloydb-observability` extensions. -2. **Other agents with MCP support**: use MCP Toolbox with the official AlloyDB prebuilt config. -3. **No extension / no MCP**: fall back to `gcloud`, Auth Proxy, `psql`, and SQL guidance from this skill's references. - -Do not make the skill Gemini-only. The Gemini extension path is preferred when available, but the operational guidance in this skill must still work for Claude, Codex, OpenCode, Antigravity, and plain terminal workflows. - - - -## Guardrails - -- **Always use Private Service Access** — AlloyDB does not support public IP; PSA must be configured before cluster creation -- **Use the AlloyDB Auth Proxy** for connections outside the VPC — never expose the private IP directly -- **Columnar engine is for analytics only** — do not enable on tables with heavy OLTP write patterns; it adds overhead to writes -- **Size read pools based on read traffic** — do not use read pools as a substitute for query optimization -- **Set `--password` at cluster creation** — there is no way to recover the initial password; store it in Secret Manager -- **Always specify `sslmode=require`** in connection strings for security -- **Enable Cloud Monitoring** — configure `roles/monitoring.viewer` and set alerts on CPU, connections, and replication lag before going to production -- **Run EXPLAIN ANALYZE before promoting queries** — always validate query plans with `EXPLAIN (ANALYZE, BUFFERS)` on a representative dataset before production deployment -- **Rotate credentials periodically** — store passwords in Secret Manager and rotate on a schedule; use exponential backoff during rolling restarts - - - - - -### Validation Checkpoint - -Before delivering configurations, verify: - -- [ ] Private Service Access is configured (IP allocation + VPC peering) -- [ ] Cluster uses a VPC network, not the `default` network in production -- [ ] Primary instance has appropriate CPU count for the workload -- [ ] Connection pattern uses Auth Proxy or private IP (no public exposure) -- [ ] Passwords are sourced from Secret Manager, not hardcoded - - - - - -## Example - -Columnar engine setup for an analytics workload: - -```sql --- Enable columnar engine on the orders table -SELECT google_columnar_engine_add('orders'); - --- Check which columns AlloyDB recommends for columnar caching -SELECT table_name, column_name, estimated_benefit -FROM g_columnar_recommended_columns -ORDER BY estimated_benefit DESC; - --- Verify a query uses the columnar engine -EXPLAIN (ANALYZE) SELECT region, SUM(amount) -FROM orders -WHERE order_date >= '2025-01-01' -GROUP BY region; --- Look for "Columnar Scan" in the plan output -``` - -Connection string for a Python application using the Auth Proxy: - -```python -DATABASE_URL = "postgresql+asyncpg://postgres:password@127.0.0.1:5432/mydb" -# Auth Proxy runs locally, forwarding to AlloyDB private IP -``` - - - ---- - -## Observability - -AlloyDB metrics are available under `alloydb.googleapis.com/database/postgresql/*` in Cloud Monitoring. Enable Cloud Monitoring before production launch. - -**Key metrics to watch:** - -- CPU utilization — alert above 80% sustained -- Active connections — alert above 80% of `max_connections` (200 on pg18) -- Replication lag on read pool nodes — alert above 30 seconds -- Dead tuple count — high values indicate autovacuum falling behind - -**PromQL patterns** (Cloud Monitoring / Google Managed Prometheus): - -```promql -# CPU utilization -avg_over_time(alloydb_googleapis_com:database_postgresql_cpu_utilization[5m]) - -# Active connections vs capacity -alloydb_googleapis_com:database_postgresql_network_connections - -# Replication lag -max by (instance_id)(alloydb_googleapis_com:database_postgresql_replication_replica_lag_seconds) -``` - -Required role: `roles/monitoring.viewer`. See [Observability Reference](references/observability.md) for full PromQL patterns, alert policy examples, and dashboard recommendations. - ---- - -## Data Plane Operations - -Before promoting any query to production, validate with `EXPLAIN (ANALYZE, BUFFERS)`. Monitor live workload via `pg_stat_activity`. - -**Quick patterns:** - -```sql --- Active queries with duration -SELECT pid, now() - query_start AS duration, state, query -FROM pg_stat_activity -WHERE state != 'idle' -ORDER BY duration DESC; - --- Tables with bloat -SELECT relname, n_dead_tup, n_live_tup, last_autovacuum -FROM pg_stat_user_tables -WHERE n_dead_tup > 10000 -ORDER BY n_dead_tup DESC; -``` - -See [Operations Reference](references/operations.md) for EXPLAIN ANALYZE interpretation, bloat detection, autovacuum tuning, invalid index detection, and security hardening. - ---- - -## Production Patterns - -### Auth Proxy Sidecar (Kubernetes) - -Run `alloydb-auth-proxy` as a sidecar container alongside the application pod. The sidecar uses the pod's workload identity (requires `roles/alloydb.client`) and refreshes IAM tokens automatically. The application connects to `127.0.0.1:5432` with no credential management in the app layer. - -### Credential Rotation - -Store the database password in Secret Manager. On rotation: add a new secret version, update the AlloyDB user password via `gcloud alloydb users set-password`, perform a rolling restart with exponential backoff, then disable the old secret version. - -### pg18 max_connections - -Set `max_connections=200` for PostgreSQL 18 instances as the production baseline. For workloads exceeding 200 concurrent connections, add PgBouncer in transaction mode rather than raising the limit further. - -See [Operations Reference](references/operations.md) for the full Kubernetes sidecar spec, rotation runbook, and connection pooling guidance. - ---- - -## Disaster Recovery - -### Point-in-Time Recovery (PITR) - -AlloyDB continuous backup enables PITR to any second within the retention window (default 14 days). Restoration creates a new cluster — the original cluster is unaffected. - -```bash -gcloud alloydb clusters restore RESTORED_CLUSTER_ID \ - --region=REGION \ - --network=NETWORK \ - --source-cluster=projects/PROJECT_ID/locations/REGION/clusters/CLUSTER_ID \ - --point-in-time="2025-06-15T14:30:00Z" -``` - -### Cross-Region Replica Promotion - -When the primary region is unavailable, promote the secondary cluster with: - -```bash -gcloud alloydb clusters promote SECONDARY_CLUSTER_ID --region=SECONDARY_REGION -``` - -After promotion, update connection strings (via Secret Manager or environment config) to the promoted cluster endpoint. See [Operations Reference](references/operations.md) for the full failover runbook and checklist. - ---- - -## Gemini CLI and MCP Toolbox - -This section is for the **connection layer**, not for provisioning the AlloyDB cluster itself. - -Prefer the dedicated Gemini CLI extensions for managed AlloyDB. They embed the underlying MCP Toolbox flow directly, so Gemini users do not need to configure a separate MCP server first. - -Install the core AlloyDB extension: - -```bash -gemini extensions install https://github.com/gemini-cli-extensions/alloydb --auto-update -``` - -Install the observability extension when the user wants metrics, dashboards, alerts, or query-performance monitoring: - -```bash -gemini extensions install https://github.com/gemini-cli-extensions/alloydb-observability --auto-update -``` - -Prefer workspace-scoped configuration: - -```bash -gemini extensions config alloydb --scope workspace -``` - -Guide the user through configuration before starting Gemini: - -```bash -export ALLOYDB_POSTGRES_PROJECT="" -export ALLOYDB_POSTGRES_REGION="" -export ALLOYDB_POSTGRES_CLUSTER="" -export ALLOYDB_POSTGRES_INSTANCE="" -export ALLOYDB_POSTGRES_DATABASE="" -export ALLOYDB_POSTGRES_USER="" # optional -export ALLOYDB_POSTGRES_PASSWORD="" # optional -export ALLOYDB_POSTGRES_IP_TYPE="PRIVATE" # PRIVATE / PUBLIC / PSC -``` - -Important configuration guidance: - -- Gemini CLI should be `v0.6.0` or newer. -- Application Default Credentials must be available before starting Gemini. -- For read-only discovery, require `roles/alloydb.viewer`. -- For SQL access, require `roles/alloydb.client`. -- For admin actions, require `roles/alloydb.admin` plus `roles/serviceusage.serviceUsageConsumer`. -- For observability, also require `roles/monitoring.viewer`. -- Prefer IAM-first auth. Password prompts are fallback-only. -- If the instance uses private IP, Gemini CLI must run in the same VPC network. -- The extension binds connection settings at session start; if the user needs a different instance or database, save/resume chat and restart Gemini with the new environment. -- Recent upstream changes removed broken keychain password behavior, so do not promise keychain-backed credential storage. - -For non-Gemini agents, or when the user explicitly wants a shared MCP server, guide them through MCP Toolbox with the AlloyDB prebuilt config instead of inventing a custom server: - -```json -{ - "mcpServers": { - "alloydb": { - "command": "./toolbox", - "args": ["--prebuilt", "alloydb-postgres", "--stdio"], - "env": { - "ALLOYDB_POSTGRES_PROJECT": "PROJECT_ID", - "ALLOYDB_POSTGRES_REGION": "REGION", - "ALLOYDB_POSTGRES_CLUSTER": "CLUSTER_NAME", - "ALLOYDB_POSTGRES_INSTANCE": "INSTANCE_NAME", - "ALLOYDB_POSTGRES_DATABASE": "DATABASE_NAME", - "ALLOYDB_POSTGRES_USER": "USERNAME", - "ALLOYDB_POSTGRES_PASSWORD": "PASSWORD", - "ALLOYDB_POSTGRES_IP_TYPE": "PRIVATE" - } - } - } -} -``` - -For reusable project workflows, prefer generated workspace skills over one-off prompts: - -```bash -toolbox --prebuilt alloydb-postgres skills-generate \ - --name alloydb-monitor \ - --toolset monitor \ - --description "AlloyDB monitoring skill" \ - --output-dir .agents/skills -``` - -If neither Gemini extensions nor MCP Toolbox are available, fall back to the manual `gcloud`, Auth Proxy, and SQL workflows already documented in this skill's reference files. - ---- - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[Cluster Setup](references/setup.md)** - - Cluster creation, instance types (primary, read pool), Private Service Access, gcloud commands. -- **[Features](references/features.md)** - - Columnar engine, adaptive caching, ML embeddings, vector search, pgvector integration. -- **[Migration](references/migration.md)** - - Migration from Cloud SQL or on-prem PostgreSQL, Database Migration Service patterns. -- **[Observability](references/observability.md)** - - PromQL patterns, Cloud Monitoring setup, key metrics, alert policies, dashboard recommendations, Query Insights. -- **[Operations](references/operations.md)** - - EXPLAIN ANALYZE, pg_stat_activity, bloat detection, autovacuum tuning, invalid indexes, security hardening, PITR, cross-region failover, credential rotation, Auth Proxy sidecar. -- **[Gemini + MCP Guidance](references/gemini-mcp.md)** - - Extension install, IAM prerequisites, env vars, private-IP constraints, and MCP Toolbox fallback config. - ---- - -## Official References - -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [PostgreSQL / psql](https://github.com/cofin/flow/blob/main/templates/styleguides/databases/postgres_psql.md) -- [GCP Scripting](https://github.com/cofin/flow/blob/main/templates/styleguides/cloud/gcp_scripting.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/alloydb/agents/openai.yaml b/plugins/flow/skills/alloydb/agents/openai.yaml deleted file mode 100644 index fc7c826..0000000 --- a/plugins/flow/skills/alloydb/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "AlloyDB" - short_description: "Google AlloyDB clusters, read pools, migrations, and managed PostgreSQL patterns" diff --git a/plugins/flow/skills/alloydb/references/features.md b/plugins/flow/skills/alloydb/references/features.md deleted file mode 100644 index f947afe..0000000 --- a/plugins/flow/skills/alloydb/references/features.md +++ /dev/null @@ -1,131 +0,0 @@ -# AlloyDB Features - -## Columnar Engine - -The columnar engine automatically caches frequently accessed columns in a columnar format for analytical queries. - -```sql --- Enable columnar engine on a table -SELECT google_columnar_engine_add('orders'); - --- Check columnar engine status -SELECT * FROM g_columnar_recommended_columns; - --- Verify a query uses columnar engine -EXPLAIN (ANALYZE) SELECT region, SUM(amount) FROM orders GROUP BY region; --- Look for "Columnar Scan" in the plan -``` - -### Automatic Column Selection - -```sql --- AlloyDB automatically recommends columns based on workload --- View recommendations: -SELECT table_name, column_name, estimated_benefit -FROM g_columnar_recommended_columns -ORDER BY estimated_benefit DESC; -``` - -## Adaptive Caching - -AlloyDB uses an intelligent caching layer that learns access patterns. No manual configuration needed, but you can monitor it: - -```sql --- Cache hit ratio -SELECT - blks_hit::float / (blks_hit + blks_read) AS cache_hit_ratio -FROM pg_stat_database -WHERE datname = current_database(); -``` - -## ML Embeddings & Vector Search - -AlloyDB integrates with Vertex AI for generating embeddings directly in SQL. - -```sql --- Enable the extension -CREATE EXTENSION IF NOT EXISTS google_ml_integration; - --- Grant access -GRANT EXECUTE ON FUNCTION embedding TO app_user; - --- Generate embeddings using Vertex AI -SELECT embedding( - 'textembedding-gecko@003', - 'This is my document text' -) AS vector; - --- Store embeddings in a table -ALTER TABLE documents ADD COLUMN embedding vector(768); - -UPDATE documents SET embedding = embedding( - 'textembedding-gecko@003', - title || ' ' || content -); -``` - -## pgvector Integration - -```sql --- Install pgvector -CREATE EXTENSION IF NOT EXISTS vector; - --- Create table with vector column -CREATE TABLE items ( - id SERIAL PRIMARY KEY, - description TEXT, - embedding vector(768) -); - --- Create HNSW index (recommended for AlloyDB) -CREATE INDEX idx_items_embedding ON items - USING hnsw (embedding vector_cosine_ops) - WITH (m = 16, ef_construction = 200); - --- Similarity search -SELECT id, description, - 1 - (embedding <=> query_embedding) AS similarity -FROM items -ORDER BY embedding <=> query_embedding -LIMIT 10; - --- Hybrid search: combine vector similarity with filters -SELECT id, description -FROM items -WHERE category = 'electronics' -ORDER BY embedding <=> query_embedding -LIMIT 10; -``` - -## AlloyDB AI Predictions - -```sql --- Register a Vertex AI model endpoint -CALL google_ml.create_model( - model_id => 'my_model', - model_provider => 'google', - model_qualified_name => 'projects/my-project/locations/us-central1/endpoints/12345' -); - --- Use the model in SQL -SELECT id, google_ml.predict_row('my_model', json_build_object('features', features)) -FROM predict_table; -``` - -## Automated Backups & PITR - -```bash -# Backups are automatic (14-day retention by default) -# Configure continuous backup -gcloud alloydb clusters update my-cluster \ - --region=us-central1 \ - --continuous-backup-recovery-window-days=14 \ - --enable-continuous-backup - -# Point-in-time restore -gcloud alloydb clusters restore my-cluster-restored \ - --region=us-central1 \ - --network=default \ - --source-cluster=my-cluster \ - --point-in-time="2024-06-15T14:30:00Z" -``` diff --git a/plugins/flow/skills/alloydb/references/gemini-mcp.md b/plugins/flow/skills/alloydb/references/gemini-mcp.md deleted file mode 100644 index 8999963..0000000 --- a/plugins/flow/skills/alloydb/references/gemini-mcp.md +++ /dev/null @@ -1,93 +0,0 @@ -# Gemini CLI and MCP Toolbox - -## Agent Integration Order - -1. Gemini extension path -2. MCP Toolbox path for other agents or shared MCP setups -3. Manual `gcloud` + Auth Proxy + `psql` fallback - -This keeps the AlloyDB skill usable across hosts instead of coupling it to Gemini only. - -## Preferred Path for Gemini - -Use the dedicated Gemini CLI extensions for managed AlloyDB: - -```bash -gemini extensions install https://github.com/gemini-cli-extensions/alloydb --auto-update -gemini extensions install https://github.com/gemini-cli-extensions/alloydb-observability --auto-update -gemini extensions config alloydb --scope workspace -``` - -These extensions are backed by MCP Toolbox for Databases, but Gemini CLI users do not need to stand up a separate MCP server first. - -## Required Prerequisites - -- Gemini CLI `v0.6.0+` -- Application Default Credentials -- IAM roles based on the task: - - `roles/alloydb.viewer` for read-only discovery - - `roles/alloydb.client` for SQL access - - `roles/alloydb.admin` for admin operations - - `roles/serviceusage.serviceUsageConsumer` for service usage - - `roles/monitoring.viewer` for observability extension usage - -## Environment Configuration - -```bash -export ALLOYDB_POSTGRES_PROJECT="" -export ALLOYDB_POSTGRES_REGION="" -export ALLOYDB_POSTGRES_CLUSTER="" -export ALLOYDB_POSTGRES_INSTANCE="" -export ALLOYDB_POSTGRES_DATABASE="" -export ALLOYDB_POSTGRES_USER="" # optional -export ALLOYDB_POSTGRES_PASSWORD="" # optional -export ALLOYDB_POSTGRES_IP_TYPE="PRIVATE" # PRIVATE / PUBLIC / PSC -``` - -Notes: - -- Load these from a `.env` file when possible. -- If using private IP, Gemini CLI must run from the same VPC. -- Connection settings are fixed at session start, so switching instances/databases requires restarting Gemini. -- Prefer IAM-first auth. Treat passwords as a fallback path only. - -## MCP Toolbox Fallback - -For other LLMs or shared MCP setups, use the official prebuilt Toolbox config: - -```bash -gemini extensions install https://github.com/gemini-cli-extensions/mcp-toolbox --auto-update -``` - -```json -{ - "mcpServers": { - "alloydb": { - "command": "./toolbox", - "args": ["--prebuilt", "alloydb-postgres", "--stdio"], - "env": { - "ALLOYDB_POSTGRES_PROJECT": "PROJECT_ID", - "ALLOYDB_POSTGRES_REGION": "REGION", - "ALLOYDB_POSTGRES_CLUSTER": "CLUSTER_NAME", - "ALLOYDB_POSTGRES_INSTANCE": "INSTANCE_NAME", - "ALLOYDB_POSTGRES_DATABASE": "DATABASE_NAME", - "ALLOYDB_POSTGRES_USER": "USERNAME", - "ALLOYDB_POSTGRES_PASSWORD": "PASSWORD", - "ALLOYDB_POSTGRES_IP_TYPE": "PRIVATE" - } - } - } -} -``` - -For reusable workspace automation, generate project-local skills instead of rewriting the same prompts: - -```bash -toolbox --prebuilt alloydb-postgres skills-generate \ - --name alloydb-monitor \ - --toolset monitor \ - --description "AlloyDB monitoring skill" \ - --output-dir .agents/skills -``` - -If Toolbox is unavailable, fall back to the manual setup and operations references in this skill. diff --git a/plugins/flow/skills/alloydb/references/migration.md b/plugins/flow/skills/alloydb/references/migration.md deleted file mode 100644 index 2b741e9..0000000 --- a/plugins/flow/skills/alloydb/references/migration.md +++ /dev/null @@ -1,130 +0,0 @@ -# Migration to AlloyDB - -## From Cloud SQL for PostgreSQL - -### Using Database Migration Service (DMS) - -```bash -# 1. Create a connection profile for source Cloud SQL -gcloud database-migration connection-profiles create cloudsql-source \ - --region=us-central1 \ - --display-name="Cloud SQL Source" \ - --provider=CLOUDSQL \ - --cloudsql-instance=my-project:us-central1:my-cloudsql - -# 2. Create a connection profile for AlloyDB destination -gcloud database-migration connection-profiles create alloydb-dest \ - --region=us-central1 \ - --display-name="AlloyDB Dest" \ - --provider=ALLOYDB \ - --alloydb-cluster=my-alloydb-cluster - -# 3. Create migration job -gcloud database-migration migration-jobs create cloudsql-to-alloydb \ - --region=us-central1 \ - --type=CONTINUOUS \ - --source=cloudsql-source \ - --destination=alloydb-dest -``` - -### Using pg_dump/pg_restore - -```bash -# Export from Cloud SQL -gcloud sql export sql my-cloudsql-instance gs://my-bucket/export.sql \ - --database=mydb - -# Or use pg_dump directly (via Cloud SQL Auth Proxy) -pg_dump -Fc -j4 -d mydb -h 127.0.0.1 -U postgres -f mydb.dump - -# Import to AlloyDB -pg_restore -j4 -d mydb -h ALLOYDB_IP -U postgres mydb.dump -``` - -## From On-Premises PostgreSQL - -### Using DMS with Reverse SSH Tunnel - -```bash -# 1. Create source connection profile with SSH tunnel -gcloud database-migration connection-profiles create onprem-source \ - --region=us-central1 \ - --display-name="On-Prem PG" \ - --provider=POSTGRESQL \ - --host=10.0.0.50 \ - --port=5432 \ - --username=replicator \ - --password=secret \ - --forward-ssh-hostname=bastion.example.com \ - --forward-ssh-username=tunnel-user \ - --forward-ssh-private-key-file=~/.ssh/id_rsa - -# 2. Create migration job with continuous replication -gcloud database-migration migration-jobs create onprem-to-alloydb \ - --region=us-central1 \ - --type=CONTINUOUS \ - --source=onprem-source \ - --destination=alloydb-dest -``` - -### Prerequisites for Logical Replication - -On the source PostgreSQL server: - -```ini -# postgresql.conf -wal_level = logical -max_replication_slots = 10 -max_wal_senders = 10 -``` - -```sql --- Create replication user -CREATE ROLE replicator WITH REPLICATION LOGIN PASSWORD 'secret'; -GRANT SELECT ON ALL TABLES IN SCHEMA public TO replicator; - --- Create publication -CREATE PUBLICATION dms_pub FOR ALL TABLES; -``` - -## Migration Checklist - -1. **Pre-migration** - - Inventory extensions (`SELECT * FROM pg_extension`) - - Verify extension compatibility with AlloyDB - - Check for unsupported features (e.g., certain procedural languages) - - Benchmark current workload for comparison - -2. **Schema migration** - - Export schema: `pg_dump --schema-only -d mydb -f schema.sql` - - Review and adjust for AlloyDB (e.g., enable columnar engine on analytics tables) - -3. **Data migration** - - Use DMS for minimal downtime (continuous replication) - - Or pg_dump/pg_restore for simpler one-time migration - -4. **Validation** - - Row count comparison - - Checksum verification on critical tables - - Run application test suite against AlloyDB - - Performance benchmarking (pgbench, custom workloads) - -5. **Cutover** - - Stop writes to source - - Wait for replication lag to reach zero - - Promote AlloyDB as primary - - Update application connection strings - - Monitor for errors - -## Extension Compatibility - -AlloyDB supports most common PostgreSQL extensions: - -```sql --- Check available extensions -SELECT * FROM pg_available_extensions ORDER BY name; - --- Commonly used extensions supported by AlloyDB: --- pgvector, pg_trgm, hstore, uuid-ossp, pg_stat_statements, --- postgis, pgcrypto, pg_cron, google_ml_integration -``` diff --git a/plugins/flow/skills/alloydb/references/observability.md b/plugins/flow/skills/alloydb/references/observability.md deleted file mode 100644 index a112d65..0000000 --- a/plugins/flow/skills/alloydb/references/observability.md +++ /dev/null @@ -1,237 +0,0 @@ -# AlloyDB Observability - -## Cloud Monitoring Setup - -### Enable Required APIs and Roles - -```bash -# Enable Cloud Monitoring API -gcloud services enable monitoring.googleapis.com - -# Grant viewer role to the service account -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:SA_EMAIL" \ - --role="roles/monitoring.viewer" -``` - -Minimum required role: `roles/monitoring.viewer` - -### AlloyDB Metric Namespace - -All AlloyDB metrics live under `alloydb.googleapis.com/database/`. Use the Cloud Monitoring API or Metrics Explorer to query them. - -```bash -# List all AlloyDB metrics -gcloud monitoring metrics-descriptors list \ - --filter="metric.type=starts_with(\"alloydb.googleapis.com/database/\")" -``` - ---- - -## Key Metrics - -| Metric | Description | Alert Threshold | -|---|---|---| -| `alloydb.googleapis.com/database/postgresql/cpu/utilization` | CPU utilization (0–1.0) | > 0.80 sustained | -| `alloydb.googleapis.com/database/postgresql/memory/usage` | Memory bytes used | > 85% of available | -| `alloydb.googleapis.com/database/postgresql/disk/read_ops_count` | Disk read IOPS | Baseline + 3σ | -| `alloydb.googleapis.com/database/postgresql/disk/write_ops_count` | Disk write IOPS | Baseline + 3σ | -| `alloydb.googleapis.com/database/postgresql/network/connections` | Active connections | > 80% of max_connections | -| `alloydb.googleapis.com/database/postgresql/insights/aggregate/execution_time` | Cumulative query execution time | Trend alert | -| `alloydb.googleapis.com/database/postgresql/insights/aggregate/io_time` | Cumulative I/O wait time | > 20% of execution time | -| `alloydb.googleapis.com/database/postgresql/replication/replica_lag_seconds` | Replication lag on read pool nodes | > 30 seconds | -| `alloydb.googleapis.com/database/postgresql/vacuum/dead_tuple_count` | Dead tuples awaiting vacuum | > 1,000,000 per table | - ---- - -## PromQL Patterns - -These patterns are for use with Cloud Monitoring's Prometheus-compatible query interface or Managed Prometheus (GMP). - -### CPU Utilization (averaged over 5 minutes) - -```promql -avg_over_time( - alloydb_googleapis_com:database_postgresql_cpu_utilization{ - project_id="PROJECT_ID", - cluster_id="CLUSTER_ID" - }[5m] -) -``` - -### Memory Usage Percentage - -```promql -( - alloydb_googleapis_com:database_postgresql_memory_usage - / - alloydb_googleapis_com:database_postgresql_memory_total_size -) * 100 -``` - -### Active Connections vs Max Connections - -```promql -alloydb_googleapis_com:database_postgresql_network_connections{ - project_id="PROJECT_ID", - instance_id="INSTANCE_ID" -} -``` - -Compare this value against the configured `max_connections` (default 200 on pg18 instances). - -### Replication Lag (read pool nodes) - -```promql -max by (instance_id) ( - alloydb_googleapis_com:database_postgresql_replication_replica_lag_seconds{ - project_id="PROJECT_ID", - cluster_id="CLUSTER_ID" - } -) -``` - -### Query Execution Time Rate (per second) - -```promql -rate( - alloydb_googleapis_com:database_postgresql_insights_aggregate_execution_time{ - project_id="PROJECT_ID" - }[5m] -) -``` - -### I/O Wait Ratio - -```promql -rate( - alloydb_googleapis_com:database_postgresql_insights_aggregate_io_time[5m] -) -/ -rate( - alloydb_googleapis_com:database_postgresql_insights_aggregate_execution_time[5m] -) -``` - -### Disk Read IOPS Rate - -```promql -rate( - alloydb_googleapis_com:database_postgresql_disk_read_ops_count{ - project_id="PROJECT_ID", - instance_id="INSTANCE_ID" - }[1m] -) -``` - ---- - -## Alert Policy Examples - -### High CPU Alert (Cloud Monitoring JSON) - -```json -{ - "displayName": "AlloyDB High CPU", - "conditions": [ - { - "displayName": "CPU utilization > 80%", - "conditionThreshold": { - "filter": "metric.type=\"alloydb.googleapis.com/database/postgresql/cpu/utilization\" AND resource.type=\"alloydb.googleapis.com/Instance\"", - "comparison": "COMPARISON_GT", - "thresholdValue": 0.8, - "duration": "300s", - "aggregations": [ - { - "alignmentPeriod": "60s", - "perSeriesAligner": "ALIGN_MEAN" - } - ] - } - } - ], - "alertStrategy": { - "autoClose": "1800s" - } -} -``` - -### High Connection Count Alert - -```bash -gcloud monitoring policies create \ - --notification-channels=CHANNEL_ID \ - --display-name="AlloyDB High Connections" \ - --condition-display-name="Connections > 160 (80% of 200)" \ - --condition-filter='metric.type="alloydb.googleapis.com/database/postgresql/network/connections" resource.type="alloydb.googleapis.com/Instance"' \ - --condition-threshold-value=160 \ - --condition-threshold-duration=120s \ - --condition-comparison=COMPARISON_GT -``` - -### Replication Lag Alert - -```bash -gcloud monitoring policies create \ - --notification-channels=CHANNEL_ID \ - --display-name="AlloyDB Replication Lag" \ - --condition-display-name="Read pool lag > 30s" \ - --condition-filter='metric.type="alloydb.googleapis.com/database/postgresql/replication/replica_lag_seconds" resource.type="alloydb.googleapis.com/Instance"' \ - --condition-threshold-value=30 \ - --condition-threshold-duration=60s \ - --condition-comparison=COMPARISON_GT -``` - ---- - -## Dashboard Recommendations - -### Recommended Dashboard Panels - -1. **CPU utilization** — line chart, 1h window, all instances overlaid -2. **Memory usage %** — line chart, alert band at 85% -3. **Active connections** — line chart with max_connections reference line -4. **Read IOPS / Write IOPS** — stacked area chart -5. **Replication lag** — line chart per read pool node -6. **Query execution time rate** — line chart, top 5 queries by execution time -7. **Dead tuple count** — bar chart per table, alert at 1M - -### Create a Dashboard via gcloud - -```bash -gcloud monitoring dashboards create \ - --config-from-file=alloydb-dashboard.json -``` - -Use the Cloud Console Metrics Explorer to build panels interactively, then export the JSON for version control. - ---- - -## Query Insights (Built-in) - -AlloyDB Query Insights provides in-console query performance data without additional setup. - -```bash -# Enable Query Insights on an instance -gcloud alloydb instances update INSTANCE_ID \ - --cluster=CLUSTER_ID \ - --region=REGION \ - --query-insights-enabled \ - --query-string-length=1024 \ - --record-application-tags \ - --record-client-address -``` - -Access via: **Cloud Console → AlloyDB → Cluster → Query Insights** - ---- - -## Gemini CLI Extension - -For PromQL metric lookup and observability automation: - -```bash -gemini extensions install https://github.com/gemini-cli-extensions/alloydb-observability -``` - -Provides PromQL metric templates, alert policy generators, and dashboard scaffolding for AlloyDB metrics. diff --git a/plugins/flow/skills/alloydb/references/operations.md b/plugins/flow/skills/alloydb/references/operations.md deleted file mode 100644 index e25a8fa..0000000 --- a/plugins/flow/skills/alloydb/references/operations.md +++ /dev/null @@ -1,429 +0,0 @@ -# AlloyDB Operations - -## Query Analysis with EXPLAIN ANALYZE - -Always run `EXPLAIN (ANALYZE, BUFFERS)` before promoting queries to production. The `BUFFERS` flag exposes cache hit/miss data, revealing I/O bottlenecks. - -```sql --- Full analysis with buffer stats -EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) -SELECT * -FROM orders o -JOIN customers c ON o.customer_id = c.id -WHERE o.created_at >= NOW() - INTERVAL '7 days'; -``` - -### Reading the Output - -| Field | What to look for | -|---|---| -| `Seq Scan` | Missing index — consider adding one if rows filtered > 10% | -| `Rows Removed by Filter` | High values indicate poor selectivity or missing partial index | -| `Buffers: shared hit=N read=M` | High `read` relative to `hit` indicates cache misses | -| `Columnar Scan` | Confirms the AlloyDB columnar engine is being used | -| `actual time=X..Y` | Actual vs estimated row count divergence signals stale statistics | -| `Planning Time` | > 100ms suggests complex join graphs; consider `pg_hint_plan` | - -### Force Fresh Statistics - -```sql --- Update statistics on a specific table -ANALYZE orders; - --- Update statistics with verbose output -ANALYZE VERBOSE orders; -``` - ---- - -## Active Query Monitoring (pg_stat_activity) - -```sql --- All active queries with duration -SELECT - pid, - now() - pg_stat_activity.query_start AS duration, - query, - state, - wait_event_type, - wait_event, - client_addr -FROM pg_stat_activity -WHERE state != 'idle' -ORDER BY duration DESC; -``` - -### Identify Long-Running Queries - -```sql --- Queries running longer than 5 minutes -SELECT - pid, - now() - query_start AS duration, - usename, - application_name, - state, - query -FROM pg_stat_activity -WHERE state = 'active' - AND query_start < NOW() - INTERVAL '5 minutes' -ORDER BY duration DESC; -``` - -### Kill a Blocking Query - -```sql --- Graceful cancel (sends SIGINT) -SELECT pg_cancel_backend(pid); - --- Forceful termination (sends SIGTERM) -SELECT pg_terminate_backend(pid); -``` - -### Lock Monitoring - -```sql --- Find blocking lock chains -SELECT - blocked.pid AS blocked_pid, - blocked.query AS blocked_query, - blocking.pid AS blocking_pid, - blocking.query AS blocking_query -FROM pg_stat_activity blocked -JOIN pg_stat_activity blocking - ON blocking.pid = ANY(pg_blocking_pids(blocked.pid)) -WHERE cardinality(pg_blocking_pids(blocked.pid)) > 0; -``` - ---- - -## Bloat Detection - -### Table Bloat (pg_stat_user_tables) - -```sql --- Tables with high dead tuple counts -SELECT - schemaname, - relname AS table_name, - n_dead_tup AS dead_tuples, - n_live_tup AS live_tuples, - ROUND(n_dead_tup::numeric / NULLIF(n_live_tup + n_dead_tup, 0) * 100, 2) AS dead_ratio_pct, - last_autovacuum, - last_autoanalyze -FROM pg_stat_user_tables -WHERE n_dead_tup > 10000 -ORDER BY dead_ratio_pct DESC; -``` - -### Index Bloat Estimate - -```sql --- Rough index bloat estimate using pg_class -SELECT - t.relname AS table_name, - i.relname AS index_name, - pg_size_pretty(pg_relation_size(i.oid)) AS index_size, - idx_scan AS scans_since_reset -FROM pg_index x -JOIN pg_class t ON t.oid = x.indrelid -JOIN pg_class i ON i.oid = x.indexrelid -ORDER BY pg_relation_size(i.oid) DESC -LIMIT 20; -``` - -### Manual VACUUM on Bloated Tables - -```sql --- Reclaim space without locking (VACUUM) or with full rewrite (VACUUM FULL — locks table) -VACUUM (VERBOSE, ANALYZE) orders; - --- VACUUM FULL: use only during maintenance windows -VACUUM FULL orders; -``` - ---- - -## Autovacuum Tuning - -AlloyDB runs autovacuum automatically. For high-write tables, tighten the thresholds. - -```sql --- Per-table autovacuum override (high-write table example) -ALTER TABLE orders SET ( - autovacuum_vacuum_scale_factor = 0.01, -- trigger at 1% dead tuples (default: 0.2) - autovacuum_analyze_scale_factor = 0.005, -- trigger analyze at 0.5% (default: 0.1) - autovacuum_vacuum_cost_delay = 2 -- ms (reduce to run faster) -); -``` - -### Monitor Autovacuum Activity - -```sql --- Currently running autovacuum workers -SELECT - pid, - now() - query_start AS duration, - query -FROM pg_stat_activity -WHERE query LIKE 'autovacuum:%' -ORDER BY duration DESC; -``` - ---- - -## Invalid Index Detection - -Indexes can become invalid (e.g., after a failed `CREATE INDEX CONCURRENTLY`). Invalid indexes waste space and are never used. - -```sql --- Find invalid indexes -SELECT - schemaname, - tablename, - indexname, - pg_size_pretty(pg_relation_size(indexrelid)) AS index_size -FROM pg_stat_user_indexes -JOIN pg_index ON pg_index.indexrelid = pg_stat_user_indexes.indexrelid -WHERE NOT pg_index.indisvalid -ORDER BY pg_relation_size(indexrelid) DESC; -``` - -```sql --- Drop an invalid index and rebuild it -DROP INDEX CONCURRENTLY idx_orders_customer_id; -CREATE INDEX CONCURRENTLY idx_orders_customer_id ON orders (customer_id); -``` - ---- - -## Security Hardening - -### IAM Database Authentication - -```bash -# Enable IAM database authentication on the cluster -gcloud alloydb instances update INSTANCE_ID \ - --cluster=CLUSTER_ID \ - --region=REGION \ - --database-flags=alloydb.iam_authentication=on - -# Grant a GCP user IAM DB access -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="user:user@example.com" \ - --role="roles/alloydb.databaseUser" -``` - -```sql --- Create the IAM-authenticated database user (one-time) -CREATE USER "user@example.com" WITH LOGIN; -GRANT CONNECT ON DATABASE mydb TO "user@example.com"; -``` - -### SSL Enforcement - -```sql --- Verify SSL is required (AlloyDB enforces SSL by default) -SHOW ssl; - --- Confirm your connection is using SSL -SELECT ssl, version, cipher, bits FROM pg_stat_ssl WHERE pid = pg_backend_pid(); -``` - -Always include `sslmode=require` (or `sslmode=verify-full` for certificate validation) in connection strings. - -### Audit Logging - -```bash -# Enable data access audit logs for AlloyDB -gcloud projects get-iam-policy PROJECT_ID --format=json > policy.json -# Add auditLogConfigs for alloydb.googleapis.com with DATA_READ and DATA_WRITE -gcloud projects set-iam-policy PROJECT_ID policy.json -``` - -```json -{ - "auditConfigs": [ - { - "service": "alloydb.googleapis.com", - "auditLogConfigs": [ - { "logType": "DATA_READ" }, - { "logType": "DATA_WRITE" }, - { "logType": "ADMIN_READ" } - ] - } - ] -} -``` - ---- - -## Credential Rotation - -### Rotation Procedure - -1. Generate a new password and store it in Secret Manager as a new version. -2. Update AlloyDB to accept the new password. -3. Perform a rolling restart of application workloads to pick up the new credential. -4. Disable the old Secret Manager version after rollout is confirmed. - -```bash -# Step 1: Create new secret version -echo -n "new-secure-password" | gcloud secrets versions add DB_PASSWORD_SECRET --data-file=- - -# Step 2: Update AlloyDB cluster password -gcloud alloydb users set-password postgres \ - --cluster=CLUSTER_ID \ - --region=REGION \ - --password="new-secure-password" - -# Step 3: Disable old version (after workloads rotated) -gcloud secrets versions disable OLD_VERSION_ID --secret=DB_PASSWORD_SECRET -``` - -### Retry with Exponential Backoff (Python Example) - -Use exponential backoff when reconnecting after a credential rotation to avoid thundering-herd connection storms. - -```python -import time -import random -import psycopg2 -from google.cloud import secretmanager - -def get_db_password(secret_name: str) -> str: - client = secretmanager.SecretManagerServiceClient() - response = client.access_secret_version(name=f"{secret_name}/versions/latest") - return response.payload.data.decode("utf-8") - -def connect_with_retry(dsn_template: str, secret_name: str, max_attempts: int = 5): - delay = 1.0 - for attempt in range(1, max_attempts + 1): - try: - password = get_db_password(secret_name) - conn = psycopg2.connect(dsn_template.format(password=password)) - return conn - except psycopg2.OperationalError as e: - if attempt == max_attempts: - raise - jitter = random.uniform(0, delay * 0.2) - time.sleep(delay + jitter) - delay = min(delay * 2, 60.0) # cap at 60 seconds -``` - ---- - -## Disaster Recovery - -### Point-in-Time Recovery (PITR) - -AlloyDB continuous backup enables PITR to any second within the retention window (default 14 days). - -```bash -# View available backup window -gcloud alloydb clusters describe CLUSTER_ID \ - --region=REGION \ - --format="value(continuousBackupInfo)" - -# Restore to a specific point in time (creates a new cluster) -gcloud alloydb clusters restore RESTORED_CLUSTER_ID \ - --region=REGION \ - --network=projects/PROJECT_ID/global/networks/NETWORK \ - --source-cluster=projects/PROJECT_ID/locations/REGION/clusters/CLUSTER_ID \ - --point-in-time="2025-06-15T14:30:00Z" - -# Create primary instance on the restored cluster -gcloud alloydb instances create restored-primary \ - --cluster=RESTORED_CLUSTER_ID \ - --region=REGION \ - --instance-type=PRIMARY \ - --cpu-count=4 -``` - -### Cross-Region Replica Promotion (Failover Runbook) - -Use when the primary region is unavailable and the secondary cluster must be promoted. - -```bash -# Step 1: Promote the secondary cluster to standalone -gcloud alloydb clusters promote SECONDARY_CLUSTER_ID \ - --region=SECONDARY_REGION - -# Step 2: Verify the promoted cluster is accepting writes -gcloud alloydb instances describe SECONDARY_INSTANCE_ID \ - --cluster=SECONDARY_CLUSTER_ID \ - --region=SECONDARY_REGION \ - --format="value(state)" -# Expected: READY - -# Step 3: Update application connection strings to point to the promoted cluster -# (update Secret Manager or environment config — no hardcoded IPs) - -# Step 4: After primary region recovery, re-establish replication -gcloud alloydb clusters create PRIMARY_CLUSTER_ID \ - --region=PRIMARY_REGION \ - --network=NETWORK \ - --primary-cluster=projects/PROJECT_ID/locations/SECONDARY_REGION/clusters/SECONDARY_CLUSTER_ID -``` - -#### Failover Checklist - -- [ ] Confirm primary region is truly unavailable (not a transient network blip) -- [ ] Check replication lag on secondary before promotion (aim for < 30s at time of failure) -- [ ] Notify stakeholders before promoting -- [ ] Update DNS / load balancer to point to promoted cluster endpoint -- [ ] Re-enable read pool on promoted cluster if needed -- [ ] Document RTO/RPO actuals for post-incident review - ---- - -## Auth Proxy Sidecar Pattern (Kubernetes) - -Run the AlloyDB Auth Proxy as a sidecar container to avoid exposing credentials in application pods. - -```yaml -# Kubernetes pod spec excerpt -spec: - serviceAccountName: workload-sa # must have roles/alloydb.client - containers: - - name: app - image: my-app:latest - env: - - name: DATABASE_URL - value: "postgresql://postgres@127.0.0.1:5432/mydb?sslmode=disable" - # SSL handled by proxy; disable in app connection string - - - name: alloydb-auth-proxy - image: gcr.io/cloud-sql-connectors/alloydb-auth-proxy:latest - args: - - "projects/PROJECT_ID/locations/REGION/clusters/CLUSTER_ID/instances/INSTANCE_ID" - - "--port=5432" - - "--structured-logs" - resources: - requests: - memory: "32Mi" - cpu: "10m" - limits: - memory: "128Mi" - cpu: "500m" - securityContext: - runAsNonRoot: true - allowPrivilegeEscalation: false -``` - -The sidecar handles IAM token refresh automatically. The application connects to `127.0.0.1:5432` with no additional auth config. - ---- - -## pg18 Production Defaults - -When running AlloyDB with PostgreSQL 18 engine, set `max_connections=200` as the baseline. Pair with a connection pooler (PgBouncer or built-in AlloyDB connection management) to handle burst traffic without exhausting connections. - -```bash -gcloud alloydb instances update INSTANCE_ID \ - --cluster=CLUSTER_ID \ - --region=REGION \ - --database-flags=max_connections=200 -``` - -For workloads exceeding 200 concurrent connections, deploy PgBouncer in transaction mode in front of the AlloyDB instance rather than raising `max_connections` further, which increases shared memory overhead. diff --git a/plugins/flow/skills/alloydb/references/setup.md b/plugins/flow/skills/alloydb/references/setup.md deleted file mode 100644 index 9036799..0000000 --- a/plugins/flow/skills/alloydb/references/setup.md +++ /dev/null @@ -1,116 +0,0 @@ -# AlloyDB Cluster Setup - -## Prerequisites - -```bash -# Enable the AlloyDB API -gcloud services enable alloydb.googleapis.com - -# Enable Service Networking (for Private Service Access) -gcloud services enable servicenetworking.googleapis.com -``` - -## Private Service Access (Required) - -AlloyDB instances are only accessible via private IP through VPC peering. - -```bash -# Create an IP allocation for Google services -gcloud compute addresses create alloydb-range \ - --global \ - --purpose=VPC_PEERING \ - --prefix-length=16 \ - --network=default - -# Create the private connection -gcloud services vpc-peerings connect \ - --service=servicenetworking.googleapis.com \ - --ranges=alloydb-range \ - --network=default -``` - -## Create a Cluster - -```bash -# Create cluster -gcloud alloydb clusters create my-cluster \ - --region=us-central1 \ - --network=default \ - --password=my-secure-password - -# Create primary instance -gcloud alloydb instances create my-primary \ - --cluster=my-cluster \ - --region=us-central1 \ - --instance-type=PRIMARY \ - --cpu-count=4 \ - --database-flags=max_connections=500 -``` - -## Read Pool Instances - -```bash -# Create read pool (auto-scales read replicas) -gcloud alloydb instances create my-read-pool \ - --cluster=my-cluster \ - --region=us-central1 \ - --instance-type=READ_POOL \ - --cpu-count=4 \ - --read-pool-node-count=2 -``` - -## Cross-Region Replication - -```bash -# Create secondary cluster in another region -gcloud alloydb clusters create my-cluster-secondary \ - --region=europe-west1 \ - --network=default \ - --primary-cluster=projects/my-project/locations/us-central1/clusters/my-cluster - -# Create secondary instance -gcloud alloydb instances create secondary-instance \ - --cluster=my-cluster-secondary \ - --region=europe-west1 \ - --instance-type=SECONDARY \ - --cpu-count=4 -``` - -## Connecting - -```bash -# From a GCE VM in the same VPC -psql "host=ALLOYDB_IP dbname=postgres user=postgres sslmode=require" - -# Using AlloyDB Auth Proxy (recommended for external access) -./alloydb-auth-proxy \ - "projects/my-project/locations/us-central1/clusters/my-cluster/instances/my-primary" \ - --port=5432 - -# Then connect locally -psql "host=127.0.0.1 port=5432 dbname=postgres user=postgres" -``` - -## Terraform Example - -```hcl -resource "google_alloydb_cluster" "default" { - cluster_id = "my-cluster" - location = "us-central1" - network_config { - network = google_compute_network.default.id - } - initial_user { - password = var.db_password - } -} - -resource "google_alloydb_instance" "primary" { - cluster = google_alloydb_cluster.default.name - instance_id = "my-primary" - instance_type = "PRIMARY" - machine_config { - cpu_count = 4 - } -} -``` diff --git a/plugins/flow/skills/angular/SKILL.md b/plugins/flow/skills/angular/SKILL.md deleted file mode 100644 index 2655c73..0000000 --- a/plugins/flow/skills/angular/SKILL.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -name: angular -description: "Use when editing Angular projects, angular.json, *.component.ts files, @Component code, signals, standalone components, control-flow blocks, Angular migrations, or Angular version-specific APIs." ---- - -# Angular Framework Skill - - - -## Quick Reference - -### Standalone Component with Signals - - - -```typescript -import { Component, signal, computed, effect, input, output } from '@angular/core'; - -@Component({ - selector: 'app-item-list', - standalone: true, - imports: [], - template: ` -

{{ title() }} ({{ count() }})

-
    - @for (item of items(); track item.id) { -
  • {{ item.name }}
  • - } -
- ` -}) -export class ItemListComponent { - // Input signals - title = input.required(); - items = input.required(); - - // Output - itemSelected = output(); - - // Local state - selected = signal(null); - - // Computed - count = computed(() => this.items().length); - - constructor() { - effect(() => { - console.log('Selected:', this.selected()); - }); - } - - selectItem(item: Item) { - this.selected.set(item); - this.itemSelected.emit(item); - } -} -``` - -
- -### Control Flow (Angular 17+) - - - -```html - -@if (loading()) { - -} @else if (error()) { - -} @else { - -} - - -@for (item of items(); track item.id; let i = $index, first = $first) { -
{{ i + 1 }}. {{ item.name }}
-} @empty { -

No items found

-} - - -@switch (status()) { - @case ('loading') { } - @case ('error') { } - @default { } -} - - -@defer (on viewport) { - -} @loading (minimum 200ms) { - -} -``` - -
- -### Services with Inject - - - -```typescript -import { Injectable, inject } from '@angular/core'; -import { HttpClient } from '@angular/common/http'; -import { toSignal } from '@angular/core/rxjs-interop'; - -@Injectable({ providedIn: 'root' }) -export class ItemService { - private http = inject(HttpClient); - - items = toSignal(this.http.get('/api/items'), { - initialValue: [] - }); - - async create(item: CreateItemDto): Promise { - return await firstValueFrom( - this.http.post('/api/items', item) - ); - } -} -``` - - - -### Resource API (Experimental) - - -## Guardrails - -- **Always use Standalone Components** -- This is the standard for modern Angular (17+); simplifies architecture and improves tree-shaking. -- **Prefer Signals for local and shared state** -- Signals provide a more predictable and efficient reactivity model than observables for most UI state. -- **Use `inject()` instead of constructor injection** -- More concise, better type inference, and works seamlessly with functional-style code. -- **Verify control flow syntax** -- Use `@if`, `@for`, and `@switch` instead of structural directives (`*ngIf`, `*ngFor`). -- **`resource()` and `httpResource()` are experimental** -- Use only when the project explicitly accepts experimental APIs; otherwise, use `HttpClient` with `toSignal()`. -- **Align to page boundaries with `@defer`** -- Use it to lazy load heavy or non-critical components to optimize initial bundle size. - - - -## Validation Checkpoint - -- [ ] Component is marked as `standalone: true` -- [ ] New control flow syntax (`@if`, `@for`) is used instead of legacy structural directives -- [ ] Signals are used for reactive state (`signal`, `computed`, `input`) -- [ ] Dependency injection uses the `inject()` function -- [ ] `@for` loops have a meaningful `track` expression -- [ ] Heavy components or those below the fold use `@defer` for lazy loading - diff --git a/plugins/flow/skills/angular/agents/openai.yaml b/plugins/flow/skills/angular/agents/openai.yaml deleted file mode 100644 index 00b3be3..0000000 --- a/plugins/flow/skills/angular/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Angular" - short_description: "Modern Angular components, signals, standalone APIs, and migration guidance" diff --git a/plugins/flow/skills/angular/references/litestar_vite.md b/plugins/flow/skills/angular/references/litestar_vite.md deleted file mode 100644 index 8e31f25..0000000 --- a/plugins/flow/skills/angular/references/litestar_vite.md +++ /dev/null @@ -1,77 +0,0 @@ -# Litestar-Vite Integration - -This section is project-specific integration guidance. For plain Angular projects, use standard Angular CLI / Vite workflows. - -## SPA Router Configuration - -When operating in SPA mode (`mode="spa"`), routing is managed via the Angular Router on the frontend instead of the server resolving HTML endpoints. Configure your router with `provideRouter` and client-side specific options (e.g. hash routing if fallback is missing, or standard HTML5 path routing supported by Litestar's SPA routing mode). - -```typescript -import { ApplicationConfig, provideZoneChangeDetection } from '@angular/core'; -import { provideRouter, withComponentInputBinding } from '@angular/router'; -import { routes } from './app.routes'; - -export const appConfig: ApplicationConfig = { - providers: [ - provideZoneChangeDetection({ eventCoalescing: true }), - provideRouter(routes, withComponentInputBinding()) - ] -}; -``` - -## Setup with VitePlugin - -```python -# Python backend -from litestar import Litestar -from litestar_vite import ViteConfig, VitePlugin - -vite_config = ViteConfig( - mode="spa", - paths=PathConfig(resource_dir="src"), -) - -app = Litestar(plugins=[VitePlugin(config=vite_config)]) -``` - -```typescript -// vite.config.ts -import { defineConfig } from 'vite'; -import angular from '@analogjs/vite-plugin-angular'; -import { litestarVitePlugin } from 'litestar-vite-plugin'; - -export default defineConfig({ - plugins: [ - angular(), - litestarVitePlugin({ input: ['src/main.ts'] }), - ], -}); -``` - -## Using Generated Types - -```typescript -import { route } from './generated/routes'; -import type { components } from './generated/schemas'; - -type User = components['schemas']['User']; - -@Component({ ... }) -export class UserComponent { - private http = inject(HttpClient); - - loadUser(id: number) { - // Type-safe route building - return this.http.get(route('users:get', { id })); - } -} -``` - -## CLI Commands - -```bash -litestar assets install # Install deps (NOT npm install) -litestar assets serve # Dev server (NOT ng serve) -litestar assets build # Production build -litestar assets generate-types # Generate TS types -``` diff --git a/plugins/flow/skills/bash/SKILL.md b/plugins/flow/skills/bash/SKILL.md deleted file mode 100644 index 9441e6d..0000000 --- a/plugins/flow/skills/bash/SKILL.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -name: bash -description: "Use when editing shell scripts, .sh files, bash shebangs, CLI automation, text processing pipelines, shell error handling, quoting, traps, functions, or portable Bash patterns." ---- - -# Bash Scripting - -## Overview - -Bash is the default shell on most Linux distributions. This skill covers idiomatic scripting patterns following the Google Shell Style Guide, with emphasis on safety, readability, and maintainability. - -## Quick Reference - -### Safety Header (always include) - -```bash -#!/usr/bin/env bash -set -euo pipefail -IFS=$'\n\t' -``` - -| Flag | Effect | -|------|--------| -| `set -e` | Exit immediately on non-zero return | -| `set -u` | Error on unset variables | -| `set -o pipefail` | Pipe returns rightmost non-zero exit code | -| `IFS=$'\n\t'` | Safer word splitting (no space splitting) | - -### Style Essentials - -| Rule | Good | Bad | -|------|------|-----| -| Function declaration | `my_func() { ... }` | `function my_func { ... }` | -| Local variables | `local file_path="$1"` | `file_path=$1` | -| Constants | `readonly MAX_RETRIES=3` | `MAX_RETRIES=3` | -| Variable expansion | `"${var}"` | `$var` | -| Command substitution | `"$(command)"` | `` `command` `` | -| Declare + assign | `local out; out="$(cmd)"` | `local out="$(cmd)"` | -| File test | `[[ -f "${file}" ]]` | `[ -f $file ]` | - -### Common ShellCheck Fixes - -| Code | Issue | Fix | -|------|-------|-----| -| SC2086 | Unquoted variable | Double-quote: `"${var}"` | -| SC2046 | Unquoted command sub | Quote or use `mapfile` | -| SC2155 | Declare and assign together | Separate into two statements | -| SC2034 | Unused variable | Add `export` or `# shellcheck disable=SC2034` | - - - -## Workflow - -### Step 1: Start with the Safety Header - -Every script begins with the shebang, strict mode, and a usage comment block describing purpose, usage, and examples. - -### Step 2: Define Functions - -Organize logic into functions. Use `local` for all function-scoped variables. Use `main()` as the entry point, called at the bottom with `main "$@"`. - -### Step 3: Handle Arguments - -Use `getopts` for simple flags, or manual `while [[ $# -gt 0 ]]` parsing for long options. Always validate required arguments and print usage on error. - -### Step 4: Add Cleanup Traps - -Use `trap cleanup EXIT` for any script that creates temporary files, acquires locks, or needs to restore state on failure. - -### Step 5: Run ShellCheck - -Validate the script with `shellcheck script.sh` before committing. Fix all warnings; disable specific rules only with a justifying comment. - - - - - -## Guardrails - -- **Always quote variables** — unquoted variables cause word splitting and glob expansion bugs; use `"${var}"` everywhere -- **Always use ShellCheck** — run `shellcheck` on every script; it catches the majority of common bash pitfalls -- **Prefer functions over inline code** — functions with `local` variables prevent accidental global state leaks -- **Never use `eval`** unless absolutely necessary — it is the most common source of injection vulnerabilities in shell scripts -- **Use `[[ ]]` not `[ ]`** — double brackets prevent word splitting and support regex matching -- **Use `mktemp` for temporary files** — never hardcode `/tmp/myscript.tmp`; it creates race conditions -- **Avoid parsing `ls` output** — use globs (`*.txt`) or `find` with `-print0` and `read -d ''` for safe file iteration - - - - - -### Validation Checkpoint - -Before delivering a script, verify: - -- [ ] Script starts with `#!/usr/bin/env bash` and `set -euo pipefail` -- [ ] All variables are quoted with `"${var}"` -- [ ] All function variables use `local` -- [ ] `trap cleanup EXIT` is set if the script creates temporary resources -- [ ] ShellCheck passes with no unacknowledged warnings -- [ ] Script has a usage/help function accessible via `-h` or `--help` - - - - - -## Example - -A safe script template with error handling, argument parsing, and cleanup: - -```bash -#!/usr/bin/env bash -# -# Deploy an application to the target environment. -# -# Usage: -# deploy.sh [-v] [-e environment] -# -set -euo pipefail -IFS=$'\n\t' - -readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -readonly TMPDIR="$(mktemp -d)" - -cleanup() { - rm -rf "${TMPDIR}" -} -trap cleanup EXIT - -usage() { - cat < - -Options: - -v Verbose output - -e ENVIRONMENT Target environment (default: staging) - -h Show this help -EOF -} - -main() { - local verbose=false - local environment="staging" - - while getopts ":ve:h" opt; do - case "${opt}" in - v) verbose=true ;; - e) environment="${OPTARG}" ;; - h) usage; exit 0 ;; - :) echo "Error: -${OPTARG} requires an argument" >&2; exit 1 ;; - ?) echo "Error: Unknown option -${OPTARG}" >&2; exit 1 ;; - esac - done - shift $((OPTIND - 1)) - - if [[ $# -eq 0 ]]; then - echo "Error: app_name is required" >&2 - usage >&2 - exit 1 - fi - - local app_name="$1" - - if [[ "${verbose}" == true ]]; then - echo "Deploying ${app_name} to ${environment}..." - fi - - # Build and deploy logic here - echo "Deployed ${app_name} to ${environment} successfully." -} - -main "$@" -``` - - - ---- - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[Style Guide](references/style.md)** - - Google Shell Style Guide patterns: file headers, function naming, variable naming, quoting rules, error handling. -- **[Common Patterns](references/patterns.md)** - - Argument parsing (getopts), trap for cleanup, here-docs, process substitution, array manipulation, associative arrays. -- **[Safety & Defensive Scripting](references/safety.md)** - - Shellcheck compliance, avoiding common pitfalls, handling spaces in filenames, proper exit codes, signal handling. - ---- - -## Official References - -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Bash](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/bash.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/bash/agents/openai.yaml b/plugins/flow/skills/bash/agents/openai.yaml deleted file mode 100644 index 5339d94..0000000 --- a/plugins/flow/skills/bash/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Bash" - short_description: "Shell scripting with quoting, safety, error handling, and Google style patterns" diff --git a/plugins/flow/skills/bash/references/patterns.md b/plugins/flow/skills/bash/references/patterns.md deleted file mode 100644 index 655588c..0000000 --- a/plugins/flow/skills/bash/references/patterns.md +++ /dev/null @@ -1,275 +0,0 @@ -# Common Bash Patterns - -## Argument Parsing with getopts - -```bash -#!/usr/bin/env bash -set -euo pipefail - -usage() { - cat <&2; exit 1 ;; - ?) echo "Error: Unknown option -${OPTARG}" >&2; exit 1 ;; - esac - done - shift $((OPTIND - 1)) - - if [[ $# -eq 0 ]]; then - echo "Error: input_file required" >&2 - usage >&2 - exit 1 - fi - - local input_file="$1" - # ... use ${verbose}, ${output}, ${count}, ${input_file} -} - -main "$@" -``` - -## Long Options (Manual Parsing) - -```bash -main() { - local verbose=false - local output="" - local dry_run=false - - while [[ $# -gt 0 ]]; do - case "$1" in - -v|--verbose) verbose=true; shift ;; - -o|--output) output="$2"; shift 2 ;; - --dry-run) dry_run=true; shift ;; - -h|--help) usage; exit 0 ;; - --) shift; break ;; - -*) echo "Unknown option: $1" >&2; exit 1 ;; - *) break ;; - esac - done - - local -a positional=("$@") - # ... -} -``` - -## Trap for Cleanup - -```bash -cleanup() { - local exit_code=$? - rm -rf "${TEMP_DIR:-}" - exit "${exit_code}" -} - -main() { - trap cleanup EXIT - - TEMP_DIR="$(mktemp -d)" - # Work in TEMP_DIR; cleanup runs automatically on exit - cp important_file "${TEMP_DIR}/" - process "${TEMP_DIR}/important_file" -} -``` - -## Here-Documents - -```bash -# Standard here-doc -cat <" >&2 - exit "${EX_USAGE}" - fi - - local input="$1" - if [[ ! -f "${input}" ]]; then - echo "Error: File not found: ${input}" >&2 - exit "${EX_NOINPUT}" - fi -} -``` - -## Signal Handling - -```bash -#!/usr/bin/env bash -set -euo pipefail - -TEMP_DIR="" -CHILD_PID="" - -cleanup() { - local exit_code=$? - # Kill child process if running - if [[ -n "${CHILD_PID}" ]]; then - kill "${CHILD_PID}" 2>/dev/null || true - wait "${CHILD_PID}" 2>/dev/null || true - fi - # Remove temp files - if [[ -n "${TEMP_DIR}" && -d "${TEMP_DIR}" ]]; then - rm -rf "${TEMP_DIR}" - fi - exit "${exit_code}" -} - -trap cleanup EXIT ERR -trap 'echo "Interrupted" >&2; exit 130' INT -trap 'echo "Terminated" >&2; exit 143' TERM - -main() { - TEMP_DIR="$(mktemp -d)" - long_running_command & - CHILD_PID=$! - wait "${CHILD_PID}" - CHILD_PID="" -} - -main "$@" -``` - -## Avoiding Common Pitfalls - -### Don't Parse ls - -```bash -# Bad -for f in $(ls *.txt); do - -# Good -for f in *.txt; do - [[ -e "${f}" ]] || continue # Handle no matches -``` - -### Don't Use eval - -```bash -# Bad: security risk -eval "${user_input}" - -# Good: use arrays for dynamic commands -local -a cmd=("curl" "-s" "-H" "Authorization: Bearer ${token}" "${url}") -"${cmd[@]}" -``` - -### Check Command Existence - -```bash -require_command() { - local cmd="$1" - if ! command -v "${cmd}" &>/dev/null; then - echo "Error: '${cmd}' is required but not installed." >&2 - exit 1 - fi -} - -require_command jq -require_command curl -``` - -### Safe Temporary Files - -```bash -# Always use mktemp -local tmp_file -tmp_file="$(mktemp)" - -local tmp_dir -tmp_dir="$(mktemp -d)" - -# Never use predictable names in /tmp (symlink attacks) -# Bad: /tmp/myapp.log -``` - -### Arithmetic Safety - -```bash -# Use (( )) for arithmetic, not expr -(( count++ )) -(( total = a + b )) - -if (( count > max )); then - echo "Exceeded maximum" -fi - -# Integer validation -is_integer() { - local value="$1" - [[ "${value}" =~ ^-?[0-9]+$ ]] -} -``` - -### Subshell Variable Scope - -```bash -# Bug: variable set in pipe subshell is lost -count=0 -echo -e "a\nb\nc" | while read -r line; do - (( count++ )) # This modifies count in subshell -done -echo "${count}" # Still 0! - -# Fix: use process substitution -count=0 -while read -r line; do - (( count++ )) -done < <(echo -e "a\nb\nc") -echo "${count}" # 3 -``` - -## Logging Pattern - -```bash -readonly LOG_LEVEL="${LOG_LEVEL:-INFO}" - -log() { - local level="$1"; shift - local timestamp - timestamp="$(date +'%Y-%m-%d %H:%M:%S')" - echo "[${timestamp}] [${level}] $*" >&2 -} - -log_debug() { [[ "${LOG_LEVEL}" == "DEBUG" ]] && log "DEBUG" "$@" || true; } -log_info() { log "INFO" "$@"; } -log_warn() { log "WARN" "$@"; } -log_error() { log "ERROR" "$@"; } - -# Usage -log_info "Starting process" -log_error "Failed to connect to ${host}" -``` - -## Retry Pattern - -```bash -retry() { - local max_attempts="$1"; shift - local delay="$1"; shift - local attempt=1 - - while (( attempt <= max_attempts )); do - if "$@"; then - return 0 - fi - log_warn "Attempt ${attempt}/${max_attempts} failed. Retrying in ${delay}s..." - sleep "${delay}" - (( attempt++ )) - done - - log_error "All ${max_attempts} attempts failed" - return 1 -} - -# Usage -retry 3 5 curl -sf "https://api.example.com/health" -``` diff --git a/plugins/flow/skills/bash/references/style.md b/plugins/flow/skills/bash/references/style.md deleted file mode 100644 index b47e2e1..0000000 --- a/plugins/flow/skills/bash/references/style.md +++ /dev/null @@ -1,227 +0,0 @@ -# Bash Style Guide - -Based on the [Google Shell Style Guide](https://google.github.io/styleguide/shellguide.html). - -## File Header - -```bash -#!/usr/bin/env bash -# -# Brief description of the script's purpose. -# -# Usage: -# script_name.sh [options] -# -# Examples: -# script_name.sh --verbose /path/to/input -``` - -## Strict Mode - -Always start scripts with: - -```bash -set -euo pipefail -``` - -| Flag | Meaning | -|------|---------| -| `-e` | Exit on error | -| `-u` | Error on undefined variables | -| `-o pipefail` | Pipe fails if any command fails | - -## Function Naming - -Use lowercase with underscores. Use `::` for namespacing in libraries. - -```bash -# Simple function -process_file() { - local file="$1" - # ... -} - -# Namespaced (for libraries) -mylib::validate_input() { - local input="$1" - # ... -} -``` - -## Function Declarations - -Use the `name()` form, not `function name`. - -```bash -# Good -my_function() { - local arg1="$1" - local arg2="$2" - # ... -} - -# Bad -function my_function { - # ... -} -``` - -## Variable Naming - -```bash -# Local variables: lowercase with underscores -local file_path="/tmp/data.txt" -local line_count=0 - -# Global/environment variables: uppercase with underscores -readonly MAX_RETRIES=3 -export DATABASE_URL="postgresql://localhost/mydb" - -# Constants: uppercase, declared readonly -readonly CONFIG_DIR="/etc/myapp" -readonly VERSION="1.0.0" -``` - -## Quoting Rules - -**Always quote variables unless you specifically need word splitting or glob expansion.** - -```bash -# Always quote -echo "${name}" -cp "${source_file}" "${dest_dir}/" -if [[ "${status}" == "active" ]]; then - -# Quote command substitutions -local output -output="$(some_command)" - -# Arrays: quote expansions -local -a files=("file one.txt" "file two.txt") -for f in "${files[@]}"; do - process_file "${f}" -done - -# No quotes needed for: -# - Arithmetic: (( count++ )) -# - [[ ]] pattern matching right side: [[ "${str}" == *.txt ]] -``` - -## Brace Expansion for Variables - -```bash -# Use ${var} when: -# - Adjacent to other characters -echo "${prefix}_suffix" -echo "path/${dir}/file" - -# Plain $var is acceptable when unambiguous -echo "$HOME" -``` - -## Conditionals - -Use `[[ ]]` instead of `[ ]`: - -```bash -# String comparison -if [[ "${answer}" == "yes" ]]; then - echo "Confirmed" -fi - -# Pattern matching -if [[ "${filename}" == *.tar.gz ]]; then - tar xzf "${filename}" -fi - -# Regex matching -if [[ "${email}" =~ ^[a-zA-Z0-9.]+@[a-zA-Z0-9.]+$ ]]; then - echo "Valid email" -fi - -# Numeric comparison (use (( ))) -if (( count > 10 )); then - echo "Too many" -fi -``` - -## Error Handling - -```bash -# Print errors to stderr -err() { - echo "[ERROR] $(date +'%Y-%m-%d %H:%M:%S') $*" >&2 -} - -# Usage -if ! do_something; then - err "Failed to do something" - exit 1 -fi -``` - -## Return Values - -Use `return` for functions, `exit` for scripts. Prefer returning status codes over echoing "true"/"false". - -```bash -is_valid_file() { - local file="$1" - [[ -f "${file}" && -r "${file}" ]] -} - -if is_valid_file "${path}"; then - process_file "${path}" -fi -``` - -## Indentation & Formatting - -- Use 2 spaces for indentation (no tabs). -- Maximum line length: 80 characters. -- Use `\` for line continuation. - -```bash -long_command \ - --flag1 "value1" \ - --flag2 "value2" \ - --flag3 "value3" -``` - -## main() Pattern - -```bash -#!/usr/bin/env bash -set -euo pipefail - -readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -usage() { - echo "Usage: $(basename "$0") [options] " - echo "Options:" - echo " -v, --verbose Enable verbose output" - echo " -h, --help Show this help" -} - -main() { - local verbose=false - - while [[ $# -gt 0 ]]; do - case "$1" in - -v|--verbose) verbose=true; shift ;; - -h|--help) usage; exit 0 ;; - *) break ;; - esac - done - - if [[ $# -eq 0 ]]; then - err "Missing required argument" - usage - exit 1 - fi - - # Script logic here -} - -main "$@" -``` diff --git a/plugins/flow/skills/biome/SKILL.md b/plugins/flow/skills/biome/SKILL.md deleted file mode 100644 index 8335814..0000000 --- a/plugins/flow/skills/biome/SKILL.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -name: biome -description: "Use when editing biome.json or biome.jsonc, configuring Biome linting or formatting, troubleshooting Biome diagnostics, setting ignores or overrides, or replacing ESLint/Prettier in JS and TS projects." ---- - -# Biome Skill - -## Overview - -Expert knowledge for Biome, an extremely fast toolchain for web projects (replaces ESLint and Prettier). - ---- - - - -## References Index - -For detailed guides on configurations and overrides: - -- **[Standard Configuration](references/config.md)** - - Formatter, linter rules, and JS style setups. -- **[Linter Overrides](references/overrides.md)** - - Overrides for UI components (ShadCN), routing files, and generated code modules. - - - ---- - - - -## Example Configuration - -Minimal `biome.json` with workspace overrides: - -```json -{ - "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", - "organizeImports": { "enabled": true }, - "formatter": { - "indentStyle": "space", - "indentWidth": 2, - "lineWidth": 100 - }, - "linter": { - "enabled": true, - "rules": { "recommended": true } - }, - "overrides": [ - { - "include": ["**/*.generated.ts"], - "linter": { "enabled": false } - } - ] -} -``` - - - ---- - -## Official References - -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Biome](https://github.com/cofin/flow/blob/main/templates/styleguides/tools/biome.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -Add guardrails instructions here. - - - -## Validation - -Add validation instructions here. - diff --git a/plugins/flow/skills/biome/agents/openai.yaml b/plugins/flow/skills/biome/agents/openai.yaml deleted file mode 100644 index 14f445c..0000000 --- a/plugins/flow/skills/biome/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Biome" - short_description: "Biome lint, format, workspace config, ignores, and overrides" diff --git a/plugins/flow/skills/biome/references/config.md b/plugins/flow/skills/biome/references/config.md deleted file mode 100644 index 2883f13..0000000 --- a/plugins/flow/skills/biome/references/config.md +++ /dev/null @@ -1,53 +0,0 @@ -# Biome Configuration - -Standard configuration for Biome linter and formatter. - -## Core Settings - -### Formatter - -- **Enabled**: `true` -- **Indent Width**: `2` -- **Line Width**: `180` -- **Indent Style**: `space` -- **Line Ending**: `lf` - -### Javascript Formatter - -- **Quote Style**: `"double"` -- **JSX Quote Style**: `"double"` -- **Semicolons**: `"asNeeded"` - -### Linter - -- **Enabled**: `true` -- **Recommended**: `true` - ---- - -## Linter Rules (Adjustments) - -| Group | Rule | Status | Rationale | -|-------|------|--------|-----------| -| `suspicious` | `noExplicitAny` | `off` | Flexibility in type definitions | -| `suspicious` | `noUnknownAtRules` | `off` | CSS custom rules support | -| `complexity` | `noForEach` | `off` | Standard JS iteration | -| `a11y` | `noSvgWithoutTitle` | `off` | Icon libraries often omit titles | -| `nursery` | `useSortedClasses` | `warn` | Tailwind/CSS class sorting | - ---- - -## Setup with UV - -To run Biome inside a project environment: - -```bash -# Run linter -uv run biome lint src/ - -# Run formatter check -uv run biome format src/ - -# Apply fixes -uv run biome check --apply src/ -``` diff --git a/plugins/flow/skills/biome/references/overrides.md b/plugins/flow/skills/biome/references/overrides.md deleted file mode 100644 index a796c4d..0000000 --- a/plugins/flow/skills/biome/references/overrides.md +++ /dev/null @@ -1,45 +0,0 @@ -# Biome Linter Overrides - -Specific configurations to apply overrides for generated code, UI components, and routing structures. - -## 1. Generated Code - -Directories: `src/lib/api/**/*`, `src/lib/generated/**/*` - -- **Status**: **DISABLED** -- **Reason**: Generated code should not be linted or formatted to avoid conflicts and speed up checks. - ---- - -## 2. UI Components (ShadCN) - -Directory: `src/components/ui/**/*` - -Overrides to accommodate standard ShadCN/ui component code styles. - -| Group | Rule | Status | Rationale | -|-------|------|--------|-----------| -| `performance` | `noNamespaceImport` | `off` | Standard in some components | -| `style` | `noNestedTernary` | `off` | Common in render logic | -| `suspicious` | `noArrayIndexKey` | `off` | Edge cases in lists | -| `nursery` | `noShadow` | `off` | Component scope variables | - ---- - -## 3. Routes (TanStack Router) - -Directory: `src/routes/**/*` - -| Group | Rule | Status | Rationale | -|-------|------|--------|-----------| -| `style` | `useFilenamingConvention` | `off` | File-based routing requires specific naming structures (e.g., `+page.tsx` or uppercase titles). | - ---- - -## 4. Barrel Files - -Files: `src/lib/auth/index.ts`, `src/stores/index.ts` - -| Group | Rule | Status | Rationale | -|-------|------|--------|-----------| -| `performance` | `noBarrelFile` | `off` | Necessary for clean public API grouping. | diff --git a/plugins/flow/skills/bun/SKILL.md b/plugins/flow/skills/bun/SKILL.md deleted file mode 100644 index 5c83e4c..0000000 --- a/plugins/flow/skills/bun/SKILL.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -name: bun -description: "Use when using bun.lockb, bunfig.toml, bun install, Bun workspaces, Bun test, Bun runtime APIs, JS or TS scripts run with Bun, bundling, or Node compatibility checks for Bun." ---- - -# Bun Skill - - - -## Core Capabilities - -### 1. Runtime - -Bun is a drop-in replacement for Node.js, focused on speed. - -- **HTTP Server**: `Bun.serve()` is faster than Node's `http` module. -- **File I/O**: `Bun.file()` and `Bun.write()` are optimized. -- **TypeScript**: Native support (no transpilation step needed for dev). - -### 2. Task & Test Runner - -- **Run Scripts**: `bun run script.ts` replaces `ts-node`. -- **Test**: `bun test` is a Jest-compatible, ultra-fast test runner. - - - - ```bash - bun test --watch - ``` - - - -- **Package Manager**: `bun install` is significantly faster than npm/yarn. - -## High Performance & Integration Patterns (Vertebra) - -This section details how to integrate Bun into high-performance, polyglot systems. - -### 1. Inter-Process Communication (IPC) - -When integrating with Rust/Python backends: - -- **Shared Memory (ShmRing)**: For latency < 10us, avoid piping JSON over stdout/stdin. Use shared memory ring buffers. - - *Pattern*: Pointers/offsets only passed over socket; data stays in shared memory. -- **Unix Domain Sockets (UDS)**: Use `Bun.connect()` and `Bun.listen()` with abstract namespaces (Linux) or file paths (macOS) if Shm not available. -- **Serialization**: - - Avoid `JSON.stringify` on hot paths. - - Use **Msgspec** (via bindings) or **Apache Arrow** (via `apache-arrow` js package) for zero-copy structure sharing. - -### 2. Native Bindings (FFI vs N-API) - -- **N-API (`napi-rs`)**: Preferred for stability and complex logic. It maps Rust Structs to JS Classes easily. -- **Bun FFI (`bun:ffi`)**: faster for simple C function calls but harder to maintain for complex objects. - - *Recommendation*: Use `napi-rs` for business logic, `bun:ffi` only for ultra-thin C wrappers. - -### 3. Performance Gotchas - -- **Buffer Copying**: Be careful with `Buffer` vs `Uint8Array`. Node compatibility layers might copy. Use `Uint8Array` natively where possible. -- **Streams**: `Bun.serve()` relies on `ReadableStream`. Buffering the entire request body (`await req.text()`) defeats the purpose of streaming; process chunks if possible. -- **Garbage Collection**: In tight loops, avoid allocating objects. Re-use objects or use typed arrays to keep pressure off the GC. - -## Best Practices - -- **Linting**: Use **Biome** (`bunx @biomejs/biome`) for instant linting/formatting. -- **Globals**: Use `Bun.env`, `Bun.sleep`, but generally avoid Node.js globals unless necessary for library compatibility. -- **Lockfile**: Commit `bun.lockb` for deterministic builds. - - - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [TypeScript](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/typescript.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -Add guardrails instructions here. - - - -## Validation - -Add validation instructions here. - diff --git a/plugins/flow/skills/bun/agents/openai.yaml b/plugins/flow/skills/bun/agents/openai.yaml deleted file mode 100644 index c2af5fb..0000000 --- a/plugins/flow/skills/bun/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Bun" - short_description: "Bun runtime, package manager, workspaces, tests, and bundling patterns" diff --git a/plugins/flow/skills/cloud-run/SKILL.md b/plugins/flow/skills/cloud-run/SKILL.md deleted file mode 100644 index e1dab2c..0000000 --- a/plugins/flow/skills/cloud-run/SKILL.md +++ /dev/null @@ -1,251 +0,0 @@ ---- -name: cloud-run -description: "Use when deploying containers to Google Cloud Run, editing service.yaml, using gcloud run, configuring Cloud Run Jobs, scaling, concurrency, traffic splitting, cold starts, networking, or serverless Dockerfiles." ---- - -# Google Cloud Run Skill - -## Overview - -Cloud Run is a fully managed serverless platform for running containerized applications. It automatically scales from zero to N based on incoming requests and charges only for resources used during request processing. - -## Quick Reference - -### Deployment Pipeline - -1. **Write Dockerfile** — multi-stage build with non-root user -2. **Build image** — `gcloud builds submit --tag gcr.io/PROJECT/IMAGE:TAG` -3. **Deploy service** — `gcloud run deploy SERVICE --image=IMAGE_URL --region=REGION` -4. **Manage traffic** — `gcloud run services update-traffic SERVICE --to-latest` - -### Key Service Configuration - -| Setting | Flag | Recommendation | -|---|---|---| -| CPU | `--cpu=N` | 1-8 vCPUs; start with 1 | -| Memory | `--memory=NGi` | 256Mi-32Gi; match to workload | -| Concurrency | `--concurrency=N` | 80 default; lower for memory-heavy handlers | -| Min instances | `--min-instances=N` | 1+ for production to avoid cold starts | -| Max instances | `--max-instances=N` | Set a ceiling to control costs | -| Timeout | `--timeout=N` | Up to 3600s for services, 86400s for jobs | -| CPU allocation | `--cpu-throttling=false` | Use for WebSockets, background tasks | - -### Services vs Jobs - -| Feature | Services | Jobs | -|---------|----------|------| -| Purpose | HTTP request handling | Batch/scheduled tasks | -| Scaling | Auto-scales with traffic | Runs to completion | -| Timeout | Up to 60 minutes | Up to 24 hours | -| Command | `gcloud run deploy` | `gcloud run jobs deploy` | - -### GPU (NVIDIA L4) - -```bash -gcloud run deploy SERVICE \ - --gpu=1 \ - --gpu-type=nvidia-l4 \ - --cpu=8 \ - --memory=32Gi \ - --concurrency=4 -``` - -Minimum: 4 CPU, 16 GiB. Recommended: 8 CPU, 32 GiB. Set `--concurrency` explicitly — no GPU-based autoscaling. See [references/gpu.md](references/gpu.md) for RTX PRO 6000 Blackwell, driver details, and ML inference patterns. - -### Production Networking & Secrets - -**Direct VPC Egress** — route to AlloyDB/Cloud SQL private IPs without VPC connector overhead: - -```bash -gcloud run deploy SERVICE \ - --vpc-egress=private-ranges-only \ - --network=NETWORK \ - --subnet=SUBNET -``` - -**Secret mounting:** - -```bash ---set-secrets=KEY=SECRET_NAME:latest -``` - -**Env var separator trick** — use `^||^` when values contain commas (e.g., JSON arrays in CORS origins): - -```bash ---set-env-vars=^||^CORS_ORIGINS=["https://app.example.com","https://api.example.com"]||OTHER_KEY=value -``` - -**CORS origin reconciliation workflow:** - -1. Auto-discover Cloud Run service URL (`gcloud run services describe`) -2. Discover GKE LB IP and Cloud Shell preview URLs -3. Merge with existing allowed origins, deduplicate -4. Update the secret: `gcloud secrets versions add SECRET_NAME --data-file=-` - -**IAP setup summary:** - -1. Create OAuth brand: `gcloud iap oauth-brands create --application_title=APP --support_email=EMAIL` -2. Grant IAP service identity: `gcloud projects add-iam-policy-binding PROJECT --member=serviceAccount:service-PROJECT@gcp-sa-iap.iam.gserviceaccount.com --role=roles/run.invoker` -3. Grant authorized users: `--member=user:EMAIL --role=roles/iap.httpsResourceAccessor` -4. Add deployer to prevent lockout: grant deployer `roles/iap.httpsResourceAccessor` before enabling IAP - -See [references/iap.md](references/iap.md) for full IAP configuration. - - - -## Workflow - -### Step 1: Write the Dockerfile - -Use multi-stage builds (base, builder, runner). Install dependencies in the builder stage, copy only the runtime artifacts to the runner stage. Run as a non-root user. Use `tini` as PID 1 for proper signal handling. - -### Step 2: Build and Push the Image - -Use Cloud Build (`gcloud builds submit`) or a CI pipeline to build and push to Artifact Registry or Container Registry. Tag images with the git SHA for traceability. - -### Step 3: Deploy the Service - -Deploy with `gcloud run deploy`, setting CPU, memory, concurrency, and min/max instances. Use `--no-traffic` for initial test deployments, then shift traffic with `--to-latest` or percentage-based splits. - -### Step 4: Configure Auth and Networking - -Use `--allow-unauthenticated` for public APIs. For internal services, use IAM-based auth. Set up IAP (Identity-Aware Proxy) for user-facing apps that need Google login. Use VPC Connector for access to private resources. - -### Step 5: Tune for Cold Starts - -Set `--min-instances=1` in production. Enable `--cpu-boost` for faster startup. Lazy-load heavy dependencies in application code. Pre-compile bytecode for Python. - - - - - -## Guardrails - -- **Always set memory and CPU limits** — without explicit limits, Cloud Run uses defaults that may not match your workload and can cause OOM kills -- **Handle cold starts explicitly** — set `--min-instances=1` for latency-sensitive production services; use `--cpu-boost` for faster startup -- **Use IAP for auth (not custom middleware)** — Cloud Run's built-in IAP integration eliminates custom auth code; see [references/iap.md](references/iap.md) -- **Never store state in the container** — Cloud Run instances are ephemeral; use Cloud Storage, Firestore, or a database for persistent state -- **Set `--max-instances`** to prevent runaway scaling and unexpected billing spikes -- **Use `--concurrency`** to match your application's per-instance capacity — too high causes memory pressure, too low wastes resources -- **Always use a non-root user** in Dockerfiles — Cloud Run supports it and it reduces the blast radius of container escapes -- **Always use Direct VPC egress (not VPC connector) for private DB access** — `--vpc-egress=private-ranges-only` gives direct routing to AlloyDB/Cloud SQL private IPs with lower latency and no connector overhead -- **Set `--concurrency` explicitly for GPU workloads** — Cloud Run cannot auto-scale on GPU utilization; the default of 80 will OOM a GPU instance -- **Download models from GCS, not the container image, for models >10 GB** — keeps image build fast and model updates independent of deployments -- **Use startup probes for slow-starting containers** (GPU model loading) — hold traffic until the model is ready; see [references/volumes.md](references/volumes.md) - - - - - -### Validation Checkpoint - -Before delivering configurations, verify: - -- [ ] Dockerfile uses multi-stage build with non-root user -- [ ] `--memory` and `--cpu` are explicitly set in the deploy command -- [ ] `--min-instances` is set for production services -- [ ] `--max-instances` is set to prevent unbounded scaling -- [ ] Authentication strategy is defined (IAM, IAP, or `--allow-unauthenticated`) -- [ ] Service account is specified (not using the default compute SA) - - - - - -## Example - -Minimal Dockerfile and deploy command for a Python web service: - -```dockerfile -# Dockerfile -FROM python:3.13-slim-bookworm AS builder -COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/ -WORKDIR /app -COPY pyproject.toml uv.lock ./ -RUN uv sync --frozen --no-dev --no-install-project -COPY src ./src -RUN uv sync --frozen --no-dev - -FROM python:3.13-slim-bookworm AS runner -RUN apt-get update && apt-get install -y --no-install-recommends tini \ - && rm -rf /var/lib/apt/lists/* -RUN useradd --create-home appuser -USER appuser -COPY --from=builder /app /app -ENV PATH="/app/.venv/bin:$PATH" -ENTRYPOINT ["tini", "--"] -CMD ["uvicorn", "myapp.main:app", "--host", "0.0.0.0", "--port", "8080"] -EXPOSE 8080 -``` - -Deploy command: - -```bash -# Build and push -gcloud builds submit --tag gcr.io/my-project/myapp:latest - -# Deploy with production settings -gcloud run deploy myapp \ - --image=gcr.io/my-project/myapp:latest \ - --region=us-central1 \ - --cpu=1 \ - --memory=512Mi \ - --concurrency=80 \ - --min-instances=1 \ - --max-instances=10 \ - --cpu-boost \ - --service-account=myapp-sa@my-project.iam.gserviceaccount.com \ - --allow-unauthenticated -``` - - - ---- - -> **Note:** No Gemini CLI extension exists for Cloud Run — this skill provides unique value for Cloud Run deployments, GPU workloads, and production networking patterns not covered by other tooling. - -## References Index - -For detailed guides and configuration examples, refer to the following documents in `references/`: - -- **[Services](references/services.md)** - - Service deployment, CLI commands, traffic management, concurrency, scaling, and resource configuration. -- **[Jobs](references/jobs.md)** - - Cloud Run Jobs configuration, execution, and task parallelism. -- **[Performance](references/performance.md)** - - Cold start optimization, resource tuning, concurrency guidelines, and cost/performance best practices. -- **[Terraform Configuration](references/terraform.md)** - - IaC examples for services, IAM, and custom domain mappings. -- **[Networking](references/networking.md)** - - Multi-container sidecars, Ingress settings, VPC Connector, and Secrets Management. -- **[Troubleshooting](references/troubleshooting.md)** - - Debugging startup failures, latency, memory issues, and security/reliability best practices. -- **[Dockerfile Patterns](references/dockerfile.md)** - - Multi-stage builds, uv package manager, distroless variants, non-root user setup, and tini init system. -- **[Cloud Build](references/cloudbuild.md)** - - Cloud Build pipelines, cache warming, multi-target builds, tag strategy, and Artifact Registry push patterns. -- **[Identity-Aware Proxy (IAP)](references/iap.md)** - - IAP setup for Cloud Run, JWT validation, user auto-provisioning, environment variables, gcloud commands, and Terraform configuration. -- **[GPU Support](references/gpu.md)** - - NVIDIA L4 and RTX PRO 6000 Blackwell configuration, ML inference best practices, concurrency tuning, and GPU Jobs. -- **[Volumes and Health Checks](references/volumes.md)** - - Cloud Storage FUSE mounts, NFS (Filestore), startup probes, and liveness probes. - ---- - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [GCP Scripting](https://github.com/cofin/flow/blob/main/templates/styleguides/cloud/gcp_scripting.md) -- [Bash](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/bash.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/cloud-run/agents/openai.yaml b/plugins/flow/skills/cloud-run/agents/openai.yaml deleted file mode 100644 index 61ae291..0000000 --- a/plugins/flow/skills/cloud-run/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Cloud Run" - short_description: "Google Cloud Run services, jobs, scaling, containers, networking, and deployments" diff --git a/plugins/flow/skills/cloud-run/references/cloudbuild.md b/plugins/flow/skills/cloud-run/references/cloudbuild.md deleted file mode 100644 index e5b89d2..0000000 --- a/plugins/flow/skills/cloud-run/references/cloudbuild.md +++ /dev/null @@ -1,166 +0,0 @@ -# Cloud Build Patterns - -Production Cloud Build configurations for building and pushing container images to Artifact Registry. - -## Basic Configuration - -```yaml -timeout: "1800s" -options: - machineType: "E2_HIGHCPU_8" -``` - -- **Machine type**: `E2_HIGHCPU_8` is recommended for Python builds with native extensions (e.g., grpcio, cryptography). -- **Timeout**: 30 minutes is a safe default for multi-stage Docker builds with frontend asset compilation. - -## Build Pipeline Steps - -### Step 0: Cache Warming - -Pull the latest image to use as Docker build cache: - -```yaml -steps: - - id: "pull-latest" - name: "gcr.io/cloud-builders/docker" - entrypoint: "bash" - args: - - "-c" - - | - docker pull ${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}:latest || exit 0 -``` - -The `|| exit 0` ensures the build continues even if no cached image exists (first build). - -### Step 1: Build Runner Image - -Build the main application image targeting a specific Dockerfile stage: - -```yaml - - id: "build-runner" - name: "gcr.io/cloud-builders/docker" - args: - - "build" - - "--target" - - "runner" - - "--cache-from" - - "${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}:latest" - - "--build-arg" - - "NPM_CONFIG_REGISTRY=${_NPM_CONFIG_REGISTRY}" - - "--build-arg" - - "PIP_INDEX_URL=${_PIP_INDEX_URL}" - - "--build-arg" - - "UV_INDEX_URL=${_UV_INDEX_URL}" - - "-t" - - "${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}:${_BRANCH_NAME}-${_SHORT_SHA}" - - "-t" - - "${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}:latest" - - "--file" - - "tools/deploy/docker/run/Dockerfile.distroless" - - "." -``` - -### Step 2: Push Images - -Push both the SHA-tagged and latest images to Artifact Registry: - -```yaml - - id: "push-latest" - name: "gcr.io/cloud-builders/docker" - args: - - "push" - - "${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}:latest" - - - id: "push-sha" - name: "gcr.io/cloud-builders/docker" - args: - - "push" - - "${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}:${_BRANCH_NAME}-${_SHORT_SHA}" -``` - -### Step 3: Build Worker Image (Optional) - -Conditionally build a separate worker target for Cloud Run Jobs: - -```yaml - - id: "build-worker" - name: "gcr.io/cloud-builders/docker" - entrypoint: "bash" - args: - - "-c" - - | - if [ "${_BUILD_WORKER}" = "true" ]; then - docker build \ - --target worker \ - --cache-from ${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}:latest \ - -t ${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}-worker:${_BRANCH_NAME}-${_SHORT_SHA} \ - -t ${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}-worker:latest \ - --file tools/deploy/docker/run/Dockerfile.distroless \ - . - docker push ${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}-worker:latest - docker push ${_REGION_NAME}-docker.pkg.dev/${_PROJECT_ID}/my-artifacts/${_SERVICE_NAME}-worker:${_BRANCH_NAME}-${_SHORT_SHA} - else - echo "Skipping worker image build (_BUILD_WORKER != true)" - fi - waitFor: ["build-runner"] -``` - -## Tag Strategy - -Two tags per image: - -| Tag | Format | Purpose | -|-----|--------|---------| -| **latest** | `SERVICE:latest` | Cache source for subsequent builds, rolling deployments | -| **branch-sha** | `SERVICE:main-abc1234` | Immutable tag for traceability and rollbacks | - -Built-in Cloud Build substitutions `_BRANCH_NAME` and `_SHORT_SHA` are used to construct the immutable tag. - -## Substitution Variables - -Define defaults that can be overridden per trigger: - -```yaml -substitutions: - _REGION_NAME: us-central1 - _BRANCH_NAME: master - _NPM_CONFIG_REGISTRY: "https://registry.npmjs.org" - _PIP_INDEX_URL: "https://pypi.org/simple" - _UV_INDEX_URL: "https://pypi.org/simple" - _BUILD_WORKER: "false" -``` - -Custom index substitutions (`_NPM_CONFIG_REGISTRY`, `_PIP_INDEX_URL`, `_UV_INDEX_URL`) support restricted environments with private package registries or mirrors. - -## Artifact Registry Push Pattern - -Images are pushed to regional Artifact Registry in the format: - -```text -REGION-docker.pkg.dev/PROJECT_ID/REPOSITORY/IMAGE:TAG -``` - -Example: - -```text -us-central1-docker.pkg.dev/my-project/my-artifacts/my-service:main-abc1234 -``` - -## Multi-Target Builds - -When a Dockerfile defines multiple targets (e.g., `runner` and `worker`), use `--target` to build each separately. The worker build can reuse layers from the runner build via `--cache-from` and `waitFor` dependencies. - -## Key Patterns - -- **Cache warming**: Always pull latest before building to maximize layer reuse -- **Conditional steps**: Use bash `if` blocks with substitution variables for optional steps -- **waitFor**: Control step ordering; worker builds should wait for runner to complete -- **Build args for registries**: Pass custom package index URLs as build args for air-gapped or mirrored environments -- **Separate worker images**: Use distinct image names (e.g., `service-worker`) for Cloud Run Jobs - -## Official References - -- -- -- -- diff --git a/plugins/flow/skills/cloud-run/references/dockerfile.md b/plugins/flow/skills/cloud-run/references/dockerfile.md deleted file mode 100644 index 7a6aba1..0000000 --- a/plugins/flow/skills/cloud-run/references/dockerfile.md +++ /dev/null @@ -1,250 +0,0 @@ -# Dockerfile Patterns for Cloud Run - -Production-ready Dockerfile patterns for Python applications on Cloud Run, using multi-stage builds with uv package manager. - -## Standard Multi-Stage Build (python-base / builder / runner) - -Three-stage pattern using `python:3.13-slim-bookworm` as the base image. - -### Stage 1: python-base - -Install system dependencies and uv package manager: - -```dockerfile -ARG BUILDER_IMAGE=python:3.13-slim-bookworm - -FROM ${BUILDER_IMAGE} AS python-base -RUN apt-get update \ - && apt-get upgrade -y \ - && apt-get install -y --no-install-recommends build-essential git tini curl \ - && apt-get autoremove -y \ - && apt-get clean -y \ - && rm -rf /root/.cache /var/apt/lists/* /var/cache/apt/* \ - && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \ - && mkdir -p /workspace/app - -# Install uv from official container image -COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -``` - -### Stage 2: builder - -Install dependencies and build the application wheel: - -```dockerfile -FROM python-base AS builder -ARG UV_INSTALL_ARGS="--no-dev" - -ENV GRPC_PYTHON_BUILD_WITH_CYTHON=1 \ - UV_LINK_MODE=copy \ - UV_NO_CACHE=1 \ - UV_COMPILE_BYTECODE=1 \ - UV_SYSTEM_PYTHON=1 \ - PATH="/workspace/app/.venv/bin:/usr/local/bin:$PATH" \ - PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONFAULTHANDLER=1 \ - PYTHONHASHSEED=random \ - LANG=C.UTF-8 \ - LC_ALL=C.UTF-8 - -WORKDIR /workspace/app - -# Copy dependency files first for layer caching -COPY pyproject.toml uv.lock README.md ./ - -# Install dependencies without the project itself -RUN uv sync ${UV_INSTALL_ARGS} --frozen --no-install-project --no-editable \ - && uv export ${UV_INSTALL_ARGS} --frozen --no-hashes --output-file=requirements.txt - -# Copy application source and build -COPY src ./src -RUN uv sync ${UV_INSTALL_ARGS} --frozen --no-editable \ - && uv build -``` - -### Stage 3: runner - -Minimal runtime image with non-root user: - -```dockerfile -FROM python-base AS runner -ARG UV_INSTALL_ARGS="--no-dev" -ARG LITESTAR_APP="myapp.asgi:app" - -ENV PATH="/workspace/app/.venv/bin:/usr/local/bin:$PATH" \ - UV_LINK_MODE=copy \ - UV_NO_CACHE=1 \ - UV_COMPILE_BYTECODE=1 \ - UV_SYSTEM_PYTHON=1 \ - PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONFAULTHANDLER=1 \ - PYTHONHASHSEED=random \ - LANG=C.UTF-8 \ - LC_ALL=C.UTF-8 \ - HOME="/workspace/app" \ - LITESTAR_APP="${LITESTAR_APP}" - -# Non-root user with UID 65532 -RUN addgroup --system --gid 65532 nonroot \ - && adduser --no-create-home --system --uid 65532 nonroot \ - && chown -R nonroot:nonroot /workspace - -COPY --from=builder --chown=65532:65532 /workspace/app/dist /tmp/ -WORKDIR /workspace/app - -RUN uv pip install --quiet --disable-pip-version-check /tmp/*.whl \ - && rm -Rf /tmp/* \ - && chown -R nonroot:nonroot /workspace/app - -USER nonroot -STOPSIGNAL SIGINT -EXPOSE 8080 - -# tini as init system for proper signal handling -ENTRYPOINT ["tini", "--"] -CMD ["app", "run", "--port", "8080", "--host", "0.0.0.0"] -VOLUME /workspace/app -``` - -## Distroless Variant (4-Stage Build) - -Uses `gcr.io/distroless/cc-debian12:nonroot` for maximum security -- no shell, no package manager in the runtime image. - -### Stage Layout - -```text -python-base -> builder -> runtime-prep -> runner (-> worker) -``` - -### Build Arguments and Base Images - -```dockerfile -# syntax=docker/dockerfile:1.7 -ARG PYTHON_VERSION=3.13 -ARG DEBIAN_VERSION=bookworm -ARG BUILDER_IMAGE=python:${PYTHON_VERSION}-slim-${DEBIAN_VERSION} -ARG RUN_IMAGE=gcr.io/distroless/cc-debian12:nonroot -``` - -### Stage 3: runtime-prep (Unique to Distroless) - -Prepares Python interpreter and shared libraries for the distroless base, which has no Python runtime: - -```dockerfile -FROM python-base AS runtime-prep - -# TARGETARCH provided by Docker buildx for multi-arch support -ARG TARGETARCH - -ENV UV_LINK_MODE=copy \ - UV_COMPILE_BYTECODE=1 - -# Create non-root user matching distroless nonroot user -RUN groupadd --system --gid 65532 nonroot \ - && useradd --no-create-home --system --uid 65532 --gid 65532 nonroot \ - && mkdir -p /app \ - && chown -R nonroot:nonroot /app - -# Create venv with COPIED Python (not symlinked) for distroless -RUN python -m venv --copies /app/.venv - -# Install dependencies into the venv -COPY --from=builder --chown=65532:65532 /app/requirements.txt /tmp/requirements.txt -COPY --from=builder --chown=65532:65532 /app/dist/*.whl /tmp/ - -RUN uv pip install --quiet --no-cache-dir --no-deps \ - --requirement=/tmp/requirements.txt \ - && uv pip install --quiet --no-cache-dir --no-deps /tmp/*.whl \ - && rm -rf /tmp/* - -# Multi-arch library copying -RUN ARCH_DIR=$([ "$TARGETARCH" = "arm64" ] && echo "aarch64-linux-gnu" || echo "x86_64-linux-gnu") \ - && mkdir -p /runtime-libs/lib /runtime-libs/usr/lib \ - && cp -a /lib/${ARCH_DIR} /runtime-libs/lib/ \ - && cp -a /usr/lib/${ARCH_DIR} /runtime-libs/usr/lib/ -``` - -### Stage 4: Distroless Runner - -```dockerfile -FROM ${RUN_IMAGE} AS runner - -ARG LITESTAR_APP="myapp.asgi:create_app" - -ENV PATH="/app/.venv/bin:/usr/local/bin:$PATH" \ - PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONFAULTHANDLER=1 \ - PYTHONHASHSEED=random \ - LANG=C.UTF-8 \ - LC_ALL=C.UTF-8 \ - LITESTAR_APP="${LITESTAR_APP}" - -# Copy Python interpreter and standard library from runtime-prep -COPY --from=runtime-prep /usr/local/lib/ /usr/local/lib/ -COPY --from=runtime-prep /usr/local/bin/python /usr/local/bin/python -COPY --from=runtime-prep /etc/ld.so.cache /etc/ld.so.cache - -# Copy tini for proper signal handling -COPY --from=runtime-prep /usr/bin/tini /usr/local/bin/tini - -# Copy required shared libraries (architecture-aware) -COPY --from=runtime-prep /runtime-libs/lib/ /lib/ -COPY --from=runtime-prep /runtime-libs/usr/lib/ /usr/lib/ -COPY --from=runtime-prep /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ - -# Copy the application virtual environment -WORKDIR /app -COPY --from=runtime-prep --chown=65532:65532 /app/.venv /app/.venv - -STOPSIGNAL SIGTERM -USER nonroot -EXPOSE 8080 - -ENTRYPOINT ["/usr/local/bin/tini", "--"] -CMD ["app", "server", "run", "--host", "0.0.0.0", "--port", "8080"] -``` - -### Stage 5: Worker Target (Cloud Run Jobs) - -Extends the runner for batch/background processing: - -```dockerfile -FROM runner AS worker - -EXPOSE 8080 -HEALTHCHECK NONE -CMD ["app-worker"] -``` - -## Key Environment Variables - -| Variable | Value | Purpose | -|----------|-------|---------| -| `PYTHONUNBUFFERED` | `1` | Disable output buffering for real-time logging | -| `PYTHONFAULTHANDLER` | `1` | Enable faulthandler for debugging crashes | -| `UV_COMPILE_BYTECODE` | `1` | Pre-compile .pyc files for faster startup | -| `UV_NO_CACHE` | `1` | Skip cache to reduce image size | -| `UV_LINK_MODE` | `copy` | Copy files instead of hardlinking (required for multi-stage) | -| `UV_SYSTEM_PYTHON` | `1` | Use system Python instead of managed versions | -| `PYTHONDONTWRITEBYTECODE` | `1` | Prevent runtime .pyc generation (already compiled) | -| `PYTHONHASHSEED` | `random` | Randomize hash seed for security | - -## Key Patterns - -- **Non-root user**: Always run as UID 65532 (`nonroot`) for security -- **tini init system**: Use `tini` as PID 1 for proper signal forwarding and zombie reaping -- **Layer caching**: Copy `pyproject.toml` and `uv.lock` before source code -- **Wheel install**: Build a wheel in the builder stage, install only the wheel in the runner -- **Multi-arch**: Use `TARGETARCH` build arg with Docker buildx for amd64/arm64 support -- **Distroless**: Use `python -m venv --copies` (not symlinks) since distroless has no system Python -- **Bun for JS builds**: Install `COPY --from=oven/bun:latest /usr/local/bin/bun /usr/local/bin/bun` for frontend asset builds - -## Official References - -- -- -- -- diff --git a/plugins/flow/skills/cloud-run/references/gpu.md b/plugins/flow/skills/cloud-run/references/gpu.md deleted file mode 100644 index fee8f74..0000000 --- a/plugins/flow/skills/cloud-run/references/gpu.md +++ /dev/null @@ -1,131 +0,0 @@ -# Cloud Run GPU Support - -## Overview - -Cloud Run supports GPU acceleration for ML inference workloads. GPUs are attached to instances on a per-service or per-job basis. - -## Available GPU Types - -### NVIDIA L4 - -```bash -gcloud run deploy SERVICE \ - --gpu=1 \ - --gpu-type=nvidia-l4 \ - --cpu=8 \ - --memory=32Gi \ - --region=us-central1 -``` - -- Minimum: 4 CPU, 16 GiB memory -- Recommended: 8 CPU, 32 GiB memory -- Driver: NVIDIA 535.x (pre-installed) -- VRAM: 24 GB - -### NVIDIA RTX PRO 6000 Blackwell (Preview) - -```bash -gcloud run deploy SERVICE \ - --gpu=1 \ - --gpu-type=nvidia-rtx-pro-6000-blackwell \ - --cpu=20 \ - --memory=80Gi \ - --region=us-central1 -``` - -- Minimum: 20 CPU, 80 GiB memory -- Driver: NVIDIA 580.x (pre-installed) -- VRAM: 96 GB - -## Driver and Library Setup - -Drivers are pre-installed — no custom driver installation needed in your Dockerfile. - -- `LD_LIBRARY_PATH` is automatically configured -- NVIDIA libraries available at `/usr/local/nvidia/lib64` -- CUDA toolkit accessible without additional setup - -## ML Inference Best Practices - -### Model Storage - -- **Download models from GCS, not baked into the image** for models >10 GB - - Use `gsutil cp` or the GCS FUSE volume mount at startup - - Keeps image size manageable and model updates independent of deployments -- Use **GGUF format** for fast loading with llama.cpp-based frameworks -- **Pre-quantize to 4-bit** (Q4_K_M recommended) for better memory efficiency and concurrency - -### Warm-Up - -- Pre-warm LLM caches at container startup before serving traffic -- Use startup probes (see `volumes.md`) to hold traffic until the model is loaded -- Consider semantic caching (e.g., Redis or in-memory LRU) for repeated queries - -### Serving Frameworks - -| Framework | Best For | Notes | -|-----------|----------|-------| -| **vLLM** | Production serving | PagedAttention, continuous batching, OpenAI-compatible API | -| **Ollama** | Simple serving | Easy model management, good for dev/staging | -| **llama.cpp server** | CPU/GPU hybrid | GGUF-native, low overhead | - -### Example: vLLM Deployment - -```bash -gcloud run deploy llm-service \ - --image=gcr.io/my-project/vllm-app:latest \ - --gpu=1 \ - --gpu-type=nvidia-l4 \ - --cpu=8 \ - --memory=32Gi \ - --concurrency=4 \ - --min-instances=1 \ - --max-instances=3 \ - --region=us-central1 \ - --service-account=llm-sa@my-project.iam.gserviceaccount.com -``` - -## Concurrency Tuning - -GPU workloads require explicit concurrency settings — Cloud Run cannot auto-scale based on GPU utilization. - -**Formula:** - -```text ---concurrency = (model_instances × parallel_queries) + (model_instances × batch_size) -``` - -**Guidance:** - -- Set `--concurrency` explicitly — the default (80) is too high for GPU workloads -- For single-model, non-batched inference: start with `--concurrency=1` to `--concurrency=4` -- For batched inference (vLLM continuous batching): concurrency can be higher (8–16) -- Test under load and monitor GPU memory utilization (`nvidia-smi`) - -## GPU for Cloud Run Jobs - -GPU flags work identically for Jobs. Add the zonal redundancy annotation to ensure scheduling: - -```bash -gcloud run jobs deploy gpu-batch-job \ - --image=IMAGE_URL \ - --gpu=1 \ - --gpu-type=nvidia-l4 \ - --cpu=8 \ - --memory=32Gi \ - --parallelism=2 \ - --tasks=10 \ - --region=us-central1 - -# Required annotation for GPU jobs -gcloud run jobs update gpu-batch-job \ - --update-annotations=run.googleapis.com/gpu-zonal-redundancy-disabled=true \ - --region=us-central1 -``` - -**Quota planning:** GPU quota must cover `GPUs × parallelism`. For 2-parallel tasks with 1 GPU each, you need quota for 2 GPUs. - -## References - -- -- diff --git a/plugins/flow/skills/cloud-run/references/iap.md b/plugins/flow/skills/cloud-run/references/iap.md deleted file mode 100644 index b7bfd21..0000000 --- a/plugins/flow/skills/cloud-run/references/iap.md +++ /dev/null @@ -1,240 +0,0 @@ -# Identity-Aware Proxy (IAP) for Cloud Run - -## Overview - -Identity-Aware Proxy provides zero-trust access control for Cloud Run services. Users authenticate via their Google identity, and IAP forwards a signed JWT to the backend in the `X-Goog-IAP-JWT-Assertion` header. No login screen is needed -- users are authenticated seamlessly through Google Workspace. - -- Official docs: - -## Authentication Flow - -1. User requests the Cloud Run service URL. -2. IAP checks Google identity (redirects to login if unauthenticated). -3. IAP verifies the user has the `roles/iap.httpsResourceAccessor` role. -4. IAP forwards the request with the `X-Goog-IAP-JWT-Assertion` header. -5. The service validates the JWT signature against Google's JWKS endpoint. -6. The service extracts the user's email from the JWT claims. -7. The service looks up or auto-provisions the user in the database. - -## IAP JWT Token - -IAP sends a signed JWT (ES256 algorithm) in the `X-Goog-IAP-JWT-Assertion` header containing: - -| Claim | Description | -|---------|--------------------------------------------------| -| `sub` | Stable user identifier | -| `email` | User's email address | -| `azp` | Authorized party (OAuth client ID) | -| `aud` | Expected audience (Cloud Run service path) | -| `iss` | Issuer: `https://cloud.google.com/iap` | -| `exp` | Token expiration | -| `iat` | Token issued at | - -Validate the JWT against Google's public JWKS keys at `https://www.gstatic.com/iap/verify/public_key-jwk`. Required validation checks: `exp`, `iat`, `aud`, `sub`, and `iss`. - -## Environment Variables - -| Variable | Example Value | Description | -|---------------------------|----------------------------------------------------------------|---------------------------------------------| -| `AUTH_IAP_ENABLED` | `true` | Enable IAP authentication | -| `IAP_AUDIENCE` | `/projects/{PROJECT_NUM}/locations/{REGION}/services/{SERVICE}` | Auto-computed audience for JWT validation | -| `AUTH_IAP_AUTO_PROVISION` | `true` | Auto-create users from IAP claims | -| `AUTH_LOCAL_ENABLED` | `false` | Disable local email/password login | -| `IAP_ALLOWED_DOMAINS` | `example.com,corp.example.com` | Domain allowlist for auto-provisioning | -| `IAP_CLOCK_SKEW_SECONDS` | `30` | Clock skew tolerance for JWT validation | - -When `AUTH_IAP_ENABLED=true`, `IAP_AUDIENCE` is required. When both IAP and local auth are enabled a warning is emitted -- disable local auth in production to prevent bypass. - -## User Auto-Provisioning - -When `AUTH_IAP_AUTO_PROVISION=true`, first-time IAP users are created automatically: - -1. JWT signature is validated against Google's public keys. -2. Email is extracted from the `email` claim. -3. If no matching user exists, a new account is created: - - Email: from IAP token - - Name: derived from email (prefix before `@`) - - Verified: `true` (IAP handles verification) - - Password: random (not used with IAP) - -Use `IAP_ALLOWED_DOMAINS` to restrict which email domains can auto-provision. Without an allowlist, any authenticated Google account can create an account. - -## Enabling IAP on Cloud Run - -### Deploy with IAP enabled - -```bash -gcloud run deploy SERVICE_NAME \ - --image=IMAGE_URL \ - --region=REGION \ - --project=PROJECT_ID \ - --ingress=all \ - --no-allow-unauthenticated \ - --iap -``` - -The `--iap` flag enables IAP on the Cloud Run service. Combined with `--no-allow-unauthenticated`, all traffic must pass through IAP. - -### Enable the IAP API and service agent - -```bash -# Enable IAP API -gcloud services enable iap.googleapis.com --project=PROJECT_ID - -# Create IAP service identity -gcloud beta services identity create \ - --service=iap.googleapis.com \ - --project=PROJECT_ID -``` - -### Configure OAuth brand (consent screen) - -```bash -# List existing brands -gcloud alpha iap oauth-brands list --project=PROJECT_ID --format=json - -# Create brand if needed (Internal type for Workspace orgs) -gcloud alpha iap oauth-brands create \ - --project=PROJECT_ID \ - --application_title="My App" \ - --support_email="admin@example.com" -``` - -### Grant IAP service account run.invoker - -The IAP service agent needs `roles/run.invoker` to forward requests to Cloud Run: - -```bash -PROJECT_NUMBER=$(gcloud projects describe PROJECT_ID --format='value(projectNumber)') -IAP_SA="service-${PROJECT_NUMBER}@gcp-sa-iap.iam.gserviceaccount.com" - -gcloud run services add-iam-policy-binding SERVICE_NAME \ - --region=REGION \ - --member="serviceAccount:${IAP_SA}" \ - --role="roles/run.invoker" -``` - -### Grant user/group access - -```bash -# Grant access to a Google Group -gcloud beta iap web add-iam-policy-binding \ - --project=PROJECT_ID \ - --resource-type=cloud-run \ - --service=SERVICE_NAME \ - --region=REGION \ - --member="group:team@example.com" \ - --role="roles/iap.httpsResourceAccessor" - -# Grant access to an individual user -gcloud beta iap web add-iam-policy-binding \ - --project=PROJECT_ID \ - --resource-type=cloud-run \ - --service=SERVICE_NAME \ - --region=REGION \ - --member="user:alice@example.com" \ - --role="roles/iap.httpsResourceAccessor" - -# Grant access to a service account (e.g., for Looker or programmatic access) -gcloud beta iap web add-iam-policy-binding \ - --project=PROJECT_ID \ - --resource-type=cloud-run \ - --service=SERVICE_NAME \ - --region=REGION \ - --member="serviceAccount:sa@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/iap.httpsResourceAccessor" -``` - -## Terraform Configuration - -```hcl -# Enable IAP API -resource "google_project_service" "iap" { - project = var.project_id - service = "iap.googleapis.com" -} - -# Cloud Run service with IAP -resource "google_cloud_run_v2_service" "app" { - name = "my-service" - location = var.region - project = var.project_id - - template { - containers { - image = var.image - - env { - name = "AUTH_IAP_ENABLED" - value = "true" - } - env { - name = "IAP_AUDIENCE" - value = "/projects/${data.google_project.project.number}/locations/${var.region}/services/my-service" - } - env { - name = "AUTH_IAP_AUTO_PROVISION" - value = "true" - } - env { - name = "AUTH_LOCAL_ENABLED" - value = "false" - } - } - } -} - -# IAP configuration for the Cloud Run service -resource "google_iap_web_type_compute_iam_member" "iap_access" { - project = var.project_id - role = "roles/iap.httpsResourceAccessor" - member = "group:team@example.com" -} - -# Grant IAP service agent run.invoker -resource "google_cloud_run_service_iam_member" "iap_invoker" { - project = var.project_id - location = var.region - service = google_cloud_run_v2_service.app.name - role = "roles/run.invoker" - member = "serviceAccount:service-${data.google_project.project.number}@gcp-sa-iap.iam.gserviceaccount.com" -} - -# OAuth brand (consent screen) -resource "google_iap_brand" "project_brand" { - project = var.project_id - support_email = var.support_email - application_title = "My App" -} -``` - -## Firewall Rules for IAP TCP Tunneling - -For bastion hosts or debugging via IAP TCP tunneling, allow the IAP IP range: - -```bash -gcloud compute firewall-rules create allow-iap-tcp \ - --project=PROJECT_ID \ - --network=VPC_NAME \ - --rules=tcp:22,tcp:5432,tcp:8888 \ - --source-ranges=35.235.240.0/20 \ - --description="Allow IAP TCP tunneling" \ - --direction=INGRESS \ - --action=ALLOW \ - --priority=1000 \ - --target-tags=bastion -``` - -Connect to a bastion through IAP: - -```bash -gcloud compute ssh BASTION_NAME --zone=ZONE --tunnel-through-iap -``` - -## Security Considerations - -- **Zero Trust**: Users must authenticate with Google AND have explicit IAP access via `roles/iap.httpsResourceAccessor`. -- **Disable local auth in production**: When IAP is enabled, set `AUTH_LOCAL_ENABLED=false` to prevent credential-based bypass. -- **Domain allowlist**: Use `IAP_ALLOWED_DOMAINS` to restrict auto-provisioning to specific email domains. -- **Audit logging**: All IAP access is logged in Cloud Logging. -- **Clock skew**: JWT validation allows configurable clock skew (default 30 seconds) for distributed systems. diff --git a/plugins/flow/skills/cloud-run/references/jobs.md b/plugins/flow/skills/cloud-run/references/jobs.md deleted file mode 100644 index 593e97c..0000000 --- a/plugins/flow/skills/cloud-run/references/jobs.md +++ /dev/null @@ -1,164 +0,0 @@ -# Cloud Run Jobs - -## Overview - -Cloud Run Jobs are designed for batch and scheduled tasks that run to completion, as opposed to Services which handle HTTP requests. Jobs support up to 24-hour timeouts and are billed per-execution. - -## CLI Commands - -```bash -# Deploy job -gcloud run jobs deploy JOB \ - --image=IMAGE_URL \ - --region=REGION \ - --tasks=10 \ - --parallelism=5 \ - --task-timeout=3600 - -# Execute job -gcloud run jobs execute JOB --region=REGION - -# List job executions -gcloud run jobs executions list --job=JOB -``` - -## Services vs Jobs Comparison - -| Feature | Services | Jobs | -|---------|----------|------| -| Purpose | HTTP request handling | Batch/scheduled tasks | -| Scaling | Auto-scales with traffic | Runs to completion | -| Billing | Per-request CPU time | Per-execution | -| Timeout | Up to 60 minutes | Up to 24 hours | -| Command | `gcloud run deploy` | `gcloud run jobs deploy` | - ---- - -## Task Parallelism - -Cloud Run Jobs support up to **10,000 parallel tasks** per execution. - -```bash -gcloud run jobs deploy parallel-job \ - --image=IMAGE_URL \ - --tasks=1000 \ - --parallelism=50 \ - --task-timeout=3600 \ - --region=us-central1 -``` - -### Work Distribution with Environment Variables - -Each task receives injected environment variables for partitioning work: - -| Variable | Description | -|----------|-------------| -| `CLOUD_RUN_TASK_INDEX` | Zero-based index of this task (0 to tasks-1) | -| `CLOUD_RUN_TASK_COUNT` | Total number of tasks in the execution | - -Example usage in Python: - -```python -import os - -task_index = int(os.environ["CLOUD_RUN_TASK_INDEX"]) -task_count = int(os.environ["CLOUD_RUN_TASK_COUNT"]) - -# Process this task's slice of work -items = all_items[task_index::task_count] -``` - -### Timeouts and Retries - -| Setting | Default | Maximum | Flag | -|---------|---------|---------|------| -| Task timeout | 10 minutes | 168 hours (7 days) | `--task-timeout` | -| Max retries per task | 3 | N | `--max-retries=N` | - -```bash -# Long-running job with custom retry policy -gcloud run jobs deploy long-job \ - --image=IMAGE_URL \ - --task-timeout=86400 \ - --max-retries=1 \ - --region=us-central1 -``` - ---- - -## Scheduled Execution (Cloud Scheduler) - -Trigger Cloud Run Jobs on a cron schedule via Cloud Scheduler. - -### Setup - -```bash -# Create the scheduled job trigger -gcloud scheduler jobs create http my-job-schedule \ - --location=us-central1 \ - --schedule="0 2 * * *" \ - --uri="https://run.googleapis.com/v2/projects/PROJECT/locations/REGION/jobs/JOB:run" \ - --http-method=POST \ - --oauth-service-account-email=scheduler-sa@PROJECT.iam.gserviceaccount.com \ - --message-body='{}' - -# Run manually to test -gcloud scheduler jobs run my-job-schedule --location=us-central1 -``` - -### URI Pattern - -```text -https://run.googleapis.com/v2/projects/PROJECT/locations/REGION/jobs/JOB:run -``` - -### Required IAM Roles for Scheduler SA - -| Role | Purpose | -|------|---------| -| `roles/cloudscheduler.admin` | Manage Cloud Scheduler jobs | -| `roles/run.invoker` | Trigger Cloud Run Job executions | - -```bash -# Grant roles to the scheduler service account -gcloud projects add-iam-policy-binding PROJECT \ - --member=serviceAccount:scheduler-sa@PROJECT.iam.gserviceaccount.com \ - --role=roles/run.invoker -``` - -### Common Cron Schedules - -| Schedule | Meaning | -|----------|---------| -| `0 2 * * *` | Daily at 2:00 AM | -| `0 */6 * * *` | Every 6 hours | -| `0 9 * * 1` | Every Monday at 9:00 AM | -| `*/15 * * * *` | Every 15 minutes | - ---- - -## GPU for Jobs - -GPU flags work identically for Jobs as for Services. The `gpu-zonal-redundancy-disabled` annotation is required to ensure scheduling. - -```bash -# Deploy GPU job -gcloud run jobs deploy gpu-batch-job \ - --image=IMAGE_URL \ - --gpu=1 \ - --gpu-type=nvidia-l4 \ - --cpu=8 \ - --memory=32Gi \ - --tasks=10 \ - --parallelism=2 \ - --region=us-central1 - -# Add required annotation for GPU jobs -gcloud run jobs update gpu-batch-job \ - --update-annotations=run.googleapis.com/gpu-zonal-redundancy-disabled=true \ - --region=us-central1 -``` - -**Quota planning:** GPU quota must cover `GPUs × parallelism`. For 2 parallel tasks with 1 GPU each, you need quota for 2 L4 GPUs in the region. - -See [GPU reference](gpu.md) for GPU types, driver details, and ML inference patterns. diff --git a/plugins/flow/skills/cloud-run/references/networking.md b/plugins/flow/skills/cloud-run/references/networking.md deleted file mode 100644 index 742cd60..0000000 --- a/plugins/flow/skills/cloud-run/references/networking.md +++ /dev/null @@ -1,86 +0,0 @@ -# Cloud Run Networking - -## Multi-Container Services - -Cloud Run supports sidecar containers for proxies, logging, etc. - -```yaml -apiVersion: serving.knative.dev/v1 -kind: Service -metadata: - name: multi-container-service - annotations: - run.googleapis.com/launch-stage: BETA -spec: - template: - metadata: - annotations: - # Container startup ordering - run.googleapis.com/container-dependencies: "{nginx: [app]}" - spec: - containers: - # Ingress container (receives traffic) - - name: nginx - image: nginx - ports: - - containerPort: 8080 - resources: - limits: - cpu: 500m - memory: 256Mi - startupProbe: - tcpSocket: - port: 8080 - timeoutSeconds: 240 - - # Sidecar container - - name: app - image: my-app:latest - env: - - name: PORT - value: "8888" - resources: - limits: - cpu: 1000m - memory: 512Mi -``` - -## Ingress Configuration - -```bash -# Internal only (VPC) -gcloud run deploy SERVICE --ingress=internal - -# Internal + Cloud Load Balancing -gcloud run deploy SERVICE --ingress=internal-and-cloud-load-balancing - -# All traffic (public) -gcloud run deploy SERVICE --ingress=all -``` - -## VPC Connector - -```bash -# Create connector -gcloud compute networks vpc-access connectors create CONNECTOR \ - --region=REGION \ - --network=VPC_NETWORK \ - --range=10.8.0.0/28 - -# Deploy with connector -gcloud run deploy SERVICE \ - --vpc-connector=CONNECTOR \ - --vpc-egress=all-traffic -``` - -## Secrets Management - -```bash -# Use Secret Manager secret as env var -gcloud run deploy SERVICE \ - --set-secrets="DB_PASSWORD=db-password:latest" - -# Mount secret as file -gcloud run deploy SERVICE \ - --set-secrets="/secrets/config.json=app-config:latest" -``` diff --git a/plugins/flow/skills/cloud-run/references/performance.md b/plugins/flow/skills/cloud-run/references/performance.md deleted file mode 100644 index 6812869..0000000 --- a/plugins/flow/skills/cloud-run/references/performance.md +++ /dev/null @@ -1,110 +0,0 @@ -# Cloud Run Performance - -## Cold Start Optimization - -### Strategies - -1. **Minimum Instances**: Keep containers warm - -```bash -gcloud run deploy SERVICE --min-instances=1 -``` - -1. **Startup CPU Boost**: Temporarily increase CPU during startup - -```bash -gcloud run deploy SERVICE --cpu-boost -``` - -1. **Application Optimization**: - - Use minimal base images (Alpine, Distroless) - - Lazy-load heavy dependencies - - Defer non-critical initialization - - Move heavy operations to background threads - -2. **Image Optimization**: - - Image size doesn't affect cold start directly - - Focus on reducing initialization complexity - - Pre-compile bytecode (Python: `--compile-bytecode`) - -## Resource Configuration - -### CPU and Memory - -```bash -# CPU options: 1, 2, 4, 6, 8 vCPUs -gcloud run deploy SERVICE --cpu=2 - -# Memory: 128Mi to 32Gi -gcloud run deploy SERVICE --memory=2Gi - -# Combined -gcloud run deploy SERVICE --cpu=2 --memory=4Gi -``` - -### Memory Formula - -```text -Peak Memory = Standing Memory + (Memory per Request × Concurrency) -``` - -### GPU Support (Preview) - -```bash -gcloud run deploy SERVICE \ - --gpu=1 \ - --gpu-type=nvidia-l4 \ - --cpu=8 \ - --memory=32Gi -``` - -## Concurrency Tuning - -### Understanding Concurrency - -- **Default**: 80 concurrent requests per instance -- **Maximum**: 1000 concurrent requests per instance -- **Minimum**: 1 (single-threaded apps) - -### Tuning Guidelines - -| Workload Type | Recommended Concurrency | -|---------------|------------------------| -| I/O-bound async | 80-1000 | -| CPU-intensive | 1-10 | -| Memory-intensive | 10-20 | -| Mixed workloads | 20-50 | - -```bash -# Set concurrency -gcloud run deploy SERVICE --concurrency=80 - -# Single-threaded mode -gcloud run deploy SERVICE --concurrency=1 -``` - -### Language-Specific Notes - -**Python**: Set `THREADS` environment variable equal to concurrency - -```bash -gcloud run deploy SERVICE --set-env-vars="THREADS=80" --concurrency=80 -``` - -**Node.js**: Use async patterns; single-threaded but handles concurrent I/O well - -## Best Practices - -### Cost Optimization - -1. **Use appropriate concurrency** - Higher concurrency = fewer instances = lower cost -2. **Set min-instances wisely** - Balance cold starts vs always-on cost -3. **Use request-based CPU** unless you need background processing -4. **Right-size CPU/memory** - Don't over-provision - -### Performance - -1. **Enable startup CPU boost** for faster cold starts -2. **Use health probes** to ensure readiness before receiving traffic -3. **Optimize container startup** - lazy load, async init -4. **Use regional deployments** close to users diff --git a/plugins/flow/skills/cloud-run/references/services.md b/plugins/flow/skills/cloud-run/references/services.md deleted file mode 100644 index 4df6fbf..0000000 --- a/plugins/flow/skills/cloud-run/references/services.md +++ /dev/null @@ -1,184 +0,0 @@ -# Cloud Run Services - -## Key Concepts - -### Services vs Jobs - -| Feature | Services | Jobs | -|---------|----------|------| -| Purpose | HTTP request handling | Batch/scheduled tasks | -| Scaling | Auto-scales with traffic | Runs to completion | -| Billing | Per-request CPU time | Per-execution | -| Timeout | Up to 60 minutes | Up to 24 hours | -| Command | `gcloud run deploy` | `gcloud run jobs deploy` | - -### CPU Allocation Modes - -1. **Request-based (default)**: CPU only allocated during request processing - - Best for: Cost optimization, sporadic traffic - - Limitation: No background processing between requests - -2. **Always-allocated**: CPU allocated for entire container lifetime - - Best for: WebSockets, background tasks, streaming - - Cost: Higher, but enables more use cases - -```bash -# Always-allocated CPU -gcloud run deploy SERVICE --cpu-throttling=false - -# Request-based (default) -gcloud run deploy SERVICE --cpu-throttling=true -``` - -## CLI Commands - -### Deployment - -```bash -# Basic deploy -gcloud run deploy SERVICE \ - --image=IMAGE_URL \ - --region=REGION \ - --platform=managed - -# Full deployment with common options -gcloud run deploy SERVICE \ - --image=gcr.io/PROJECT/IMAGE:TAG \ - --region=us-central1 \ - --cpu=2 \ - --memory=2Gi \ - --concurrency=80 \ - --min-instances=1 \ - --max-instances=100 \ - --timeout=300 \ - --set-env-vars="KEY1=VALUE1,KEY2=VALUE2" \ - --service-account=SA@PROJECT.iam.gserviceaccount.com \ - --allow-unauthenticated - -# Deploy without traffic (for testing) -gcloud run deploy SERVICE --image=IMAGE_URL --no-traffic --tag=preview -``` - -### Traffic Management - -```bash -# Send all traffic to latest -gcloud run services update-traffic SERVICE --to-latest - -# Split traffic between revisions -gcloud run services update-traffic SERVICE \ - --to-revisions=REVISION1=70,REVISION2=30 - -# Gradual rollout (10% to latest) -gcloud run services update-traffic SERVICE \ - --to-revisions=LATEST=10 - -# Tag-based routing -gcloud run services update-traffic SERVICE \ - --to-tags=canary=10 - -# Rollback to specific revision -gcloud run services update-traffic SERVICE \ - --to-revisions=REVISION_NAME=100 -``` - -### Revision Management - -```bash -# List revisions -gcloud run revisions list --service=SERVICE - -# Describe revision -gcloud run revisions describe REVISION - -# Set tags on revisions -gcloud run services update-traffic SERVICE \ - --set-tags=stable=REVISION1,canary=REVISION2 - -# Delete old revisions -gcloud run revisions delete REVISION -``` - -### Service Management - -```bash -# List services -gcloud run services list - -# Describe service -gcloud run services describe SERVICE --region=REGION - -# Delete service -gcloud run services delete SERVICE --region=REGION - -# Update service -gcloud run services update SERVICE \ - --update-env-vars="KEY=VALUE" \ - --region=REGION -``` - -## Concurrency Configuration - -### Understanding Concurrency - -- **Default**: 80 concurrent requests per instance -- **Maximum**: 1000 concurrent requests per instance -- **Minimum**: 1 (single-threaded apps) - -### Tuning Guidelines - -| Workload Type | Recommended Concurrency | -|---------------|------------------------| -| I/O-bound async | 80-1000 | -| CPU-intensive | 1-10 | -| Memory-intensive | 10-20 | -| Mixed workloads | 20-50 | - -```bash -# Set concurrency -gcloud run deploy SERVICE --concurrency=80 - -# Single-threaded mode -gcloud run deploy SERVICE --concurrency=1 -``` - -### Language-Specific Notes - -**Python**: Set `THREADS` environment variable equal to concurrency - -```bash -gcloud run deploy SERVICE --set-env-vars="THREADS=80" --concurrency=80 -``` - -**Node.js**: Use async patterns; single-threaded but handles concurrent I/O well - -## Resource Configuration - -### CPU and Memory - -```bash -# CPU options: 1, 2, 4, 6, 8 vCPUs -gcloud run deploy SERVICE --cpu=2 - -# Memory: 128Mi to 32Gi -gcloud run deploy SERVICE --memory=2Gi - -# Combined -gcloud run deploy SERVICE --cpu=2 --memory=4Gi -``` - -### Memory Formula - -```text -Peak Memory = Standing Memory + (Memory per Request × Concurrency) -``` - -### GPU Support (Preview) - -```bash -gcloud run deploy SERVICE \ - --gpu=1 \ - --gpu-type=nvidia-l4 \ - --cpu=8 \ - --memory=32Gi -``` diff --git a/plugins/flow/skills/cloud-run/references/terraform.md b/plugins/flow/skills/cloud-run/references/terraform.md deleted file mode 100644 index 2391e2c..0000000 --- a/plugins/flow/skills/cloud-run/references/terraform.md +++ /dev/null @@ -1,107 +0,0 @@ -# Cloud Run Terraform Configuration - -## Basic Service - -```hcl -resource "google_cloud_run_v2_service" "default" { - name = "my-service" - location = "us-central1" - - template { - containers { - image = "gcr.io/my-project/my-image:latest" - - resources { - limits = { - cpu = "2" - memory = "2Gi" - } - } - - env { - name = "ENV" - value = "production" - } - - # Secret from Secret Manager - env { - name = "DB_PASSWORD" - value_source { - secret_key_ref { - secret = google_secret_manager_secret.db_password.secret_id - version = "latest" - } - } - } - - startup_probe { - http_get { - path = "/healthz" - } - initial_delay_seconds = 0 - timeout_seconds = 1 - period_seconds = 3 - failure_threshold = 3 - } - - liveness_probe { - http_get { - path = "/healthz" - } - period_seconds = 30 - } - } - - scaling { - min_instance_count = 1 - max_instance_count = 100 - } - - max_instance_request_concurrency = 80 - timeout = "300s" - service_account = google_service_account.run_sa.email - } - - traffic { - type = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST" - percent = 100 - } -} -``` - -## IAM Configuration - -```hcl -# Public access -resource "google_cloud_run_service_iam_member" "public" { - service = google_cloud_run_v2_service.default.name - location = google_cloud_run_v2_service.default.location - role = "roles/run.invoker" - member = "allUsers" -} - -# Authenticated only -resource "google_cloud_run_service_iam_member" "auth" { - service = google_cloud_run_v2_service.default.name - location = google_cloud_run_v2_service.default.location - role = "roles/run.invoker" - member = "serviceAccount:${var.invoker_sa}" -} -``` - -## Custom Domain - -```hcl -resource "google_cloud_run_domain_mapping" "default" { - location = "us-central1" - name = "api.example.com" - - metadata { - namespace = var.project_id - } - - spec { - route_name = google_cloud_run_v2_service.default.name - } -} -``` diff --git a/plugins/flow/skills/cloud-run/references/troubleshooting.md b/plugins/flow/skills/cloud-run/references/troubleshooting.md deleted file mode 100644 index e3fbb37..0000000 --- a/plugins/flow/skills/cloud-run/references/troubleshooting.md +++ /dev/null @@ -1,49 +0,0 @@ -# Cloud Run Troubleshooting - -## Container Fails to Start - -```bash -# Check logs -gcloud run services logs read SERVICE --region=REGION - -# Check revision status -gcloud run revisions describe REVISION --region=REGION -``` - -## High Latency - -1. Check cold start frequency (enable min-instances) -2. Review concurrency settings -3. Check for CPU throttling -4. Profile application startup - -## Memory Issues - -1. Increase memory limit -2. Check for memory leaks -3. Reduce concurrency -4. Review in-memory caching - -## Best Practices - -### Security - -1. **Use Workload Identity** instead of service account keys -2. **Store secrets in Secret Manager** -3. **Set appropriate ingress controls** -4. **Use VPC Connector for internal resources** -5. **Enable binary authorization** for trusted images only - -### Reliability - -1. **Set appropriate timeouts** for your workload -2. **Configure retries** for transient failures -3. **Use traffic splitting** for safe deployments -4. **Monitor with Cloud Monitoring** and set alerts - -## Resources - -- [Cloud Run Documentation](https://cloud.google.com/run/docs) -- [Cloud Run Pricing](https://cloud.google.com/run/pricing) -- [General Development Tips](https://cloud.google.com/run/docs/tips/general) -- [Cloud Run Samples](https://github.com/GoogleCloudPlatform/cloud-run-samples) diff --git a/plugins/flow/skills/cloud-run/references/volumes.md b/plugins/flow/skills/cloud-run/references/volumes.md deleted file mode 100644 index befa912..0000000 --- a/plugins/flow/skills/cloud-run/references/volumes.md +++ /dev/null @@ -1,201 +0,0 @@ -# Cloud Run Volumes and Health Checks - -## Cloud Storage FUSE - -Mount a GCS bucket as a filesystem volume in your Cloud Run service or job. Requires the **2nd-generation execution environment**. - -### Mount Command - -```bash -gcloud run deploy SERVICE \ - --add-volume=name=my-data,type=cloud-storage,bucket=MY_BUCKET \ - --add-volume-mount=volume=my-data,mount-path=/data \ - --execution-environment=gen2 \ - --region=us-central1 -``` - -### Volume Options - -| Option | Description | -|--------|-------------| -| `readonly` | Mount as read-only (recommended for model loading) | -| `stat-cache-max-size-mb=N` | Override stat cache size (default: 32 MB) | -| `only-dir=subpath` | Mount a subdirectory of the bucket instead of root | -| `uid=N` / `gid=N` | Set file ownership for non-root containers | -| `implicit-dirs` | Enable listing of directories not explicitly created | -| `log-severity=LEVEL` | FUSE log verbosity (e.g., `WARNING`, `ERROR`) | - -### Example with Options - -```bash -gcloud run deploy SERVICE \ - --add-volume=name=models,type=cloud-storage,bucket=MY_MODELS_BUCKET,readonly=true,implicit-dirs=true \ - --add-volume-mount=volume=models,mount-path=/models \ - --execution-environment=gen2 -``` - -### Caching Overhead - -Cloud Storage FUSE adds per-instance memory overhead: - -- Stat cache: ~32 MB (configurable via `stat-cache-max-size-mb`) -- Type cache: ~4 MB -- Total: ~36 MB baseline per instance - -### Limitations - -- **No file locking** — concurrent writes from multiple instances use last-write-wins semantics -- Not suitable for databases or lock-requiring workloads -- Eventual consistency on highly concurrent writes - -### Best Use Cases - -- Read-heavy workloads (ML model loading, static assets) -- Data processing where each task reads distinct files -- Sharing large artifacts across instances without baking into the image - ---- - -## NFS (Filestore) - -Mount a Filestore NFS share for workloads requiring POSIX-compatible shared storage. - -### Requirements - -- **VPC connectivity required** — Cloud Run service must have VPC access configured -- Use Direct VPC egress (`--vpc-egress=private-ranges-only`) for best performance - -### Mount Command - -```bash -gcloud run deploy SERVICE \ - --add-volume=name=shared,type=nfs,location=FILESTORE_IP,path=/share_name \ - --add-volume-mount=volume=shared,mount-path=/mnt/shared \ - --execution-environment=gen2 \ - --network=NETWORK \ - --subnet=SUBNET \ - --vpc-egress=private-ranges-only -``` - -### NFS Configuration Notes - -- Mount timeout: 30 seconds -- Uses `no-lock` mode — advisory locks are not supported -- For multiple NFS volumes, mounts are attempted in parallel to reduce startup time - ---- - -## Health Checks - -Health checks detect container startup completion, deadlocks, and resource exhaustion. - -### Startup Probes - -Startup probes delay traffic until the container signals readiness. Critical for GPU workloads loading large models. - -```yaml -# service.yaml — HTTP startup probe -apiVersion: serving.knative.dev/v1 -kind: Service -spec: - template: - spec: - containers: - - name: app - startupProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 0 - periodSeconds: 5 - failureThreshold: 60 # 5s × 60 = 5 min max wait - timeoutSeconds: 3 -``` - -```yaml -# TCP startup probe (for non-HTTP services) -startupProbe: - tcpSocket: - port: 8080 - initialDelaySeconds: 0 - periodSeconds: 5 - failureThreshold: 48 -``` - -```yaml -# gRPC startup probe -startupProbe: - grpc: - port: 50051 - periodSeconds: 5 - failureThreshold: 60 -``` - -**Parameter ranges:** - -| Parameter | Range | Notes | -|-----------|-------|-------| -| `initialDelaySeconds` | 0–240 | Seconds to wait before first probe | -| `periodSeconds` | 1–240 | Interval between probes | -| `failureThreshold` | 1–N | Failures before container restart | -| `timeoutSeconds` | 1–240 | Per-probe timeout | - -**GPU startup pattern** — set `failureThreshold` high to accommodate model loading: - -```bash -# Total startup budget = periodSeconds × failureThreshold -# For a 4-min model load: periodSeconds=5, failureThreshold=60 → 5 min budget -``` - -### Liveness Probes - -Liveness probes restart containers that have become deadlocked or exhausted. - -```yaml -livenessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 0 - periodSeconds: 10 # default: 10s - failureThreshold: 3 - timeoutSeconds: 4 -``` - -```yaml -# gRPC liveness probe -livenessProbe: - grpc: - port: 50051 - periodSeconds: 10 - failureThreshold: 3 -``` - -**When to use liveness probes:** - -- GPU/CPU-intensive inference services that can deadlock under OOM conditions -- Services with connection pools that can become exhausted -- Long-lived background threads that may silently fail - -**Note:** Liveness probes are not supported for startup — use startup probes for initial readiness. - -### Applying via gcloud - -```bash -gcloud run deploy SERVICE \ - --region=us-central1 \ - --startup-cpu-boost \ - --startup-probe-initial-delay=0 \ - --startup-probe-period=5 \ - --startup-probe-failure-threshold=60 \ - --startup-probe-path=/healthz \ - --liveness-probe-period=10 \ - --liveness-probe-failure-threshold=3 \ - --liveness-probe-path=/healthz -``` - -## References - -- -- -- diff --git a/plugins/flow/skills/cloud-sql/SKILL.md b/plugins/flow/skills/cloud-sql/SKILL.md deleted file mode 100644 index 10c783a..0000000 --- a/plugins/flow/skills/cloud-sql/SKILL.md +++ /dev/null @@ -1,302 +0,0 @@ ---- -name: cloud-sql -description: "Use when provisioning Google Cloud SQL, configuring Cloud SQL Auth Proxy, connection strings, read replicas, backups, PITR, private IP, database migrations, or managed PostgreSQL/MySQL/SQL Server on GCP." ---- - -# Cloud SQL - -## Overview - -Cloud SQL is Google Cloud's fully managed relational database service supporting PostgreSQL, MySQL, and SQL Server. It handles automated backups, replication, patching, high availability, and scaling — letting you focus on your application instead of database administration. - -## Quick Reference - -### Cloud SQL vs AlloyDB - -| Feature | Cloud SQL | AlloyDB | -|---|---|---| -| Engines | PostgreSQL, MySQL, SQL Server | PostgreSQL only | -| Storage | Attached SSD (up to 64 TB) | Disaggregated, log-based | -| Availability SLA | 99.95% (HA config) | 99.99% (regional) | -| Columnar engine | Not available | Built-in adaptive | -| ML embeddings | Manual setup | Native Vertex AI | -| Read scaling | Manual read replicas | Read pool (auto-managed) | -| Networking | Public IP or private IP | Private IP only (PSA required) | -| Cost | Lower entry cost | Higher, performance-optimized | -| Best for | General workloads, MySQL/SQL Server | High-performance PostgreSQL | - -### Instance Management - -| Action | Command | -|---|---| -| Create instance | `gcloud sql instances create NAME --database-version=POSTGRES_15 --tier=db-g1-small --region=REGION` | -| Clone instance | `gcloud sql instances clone SOURCE DEST` | -| Restart instance | `gcloud sql instances restart NAME` | -| Patch/resize | `gcloud sql instances patch NAME --tier=db-n1-standard-4` | -| Delete instance | `gcloud sql instances delete NAME` | -| Set maintenance window | `gcloud sql instances patch NAME --maintenance-window-day=SUN --maintenance-window-hour=3` | - -### Key Commands - -| Action | Command | -|---|---| -| Create database | `gcloud sql databases create DBNAME --instance=INSTANCE` | -| Create user | `gcloud sql users create USERNAME --instance=INSTANCE --password=PASS` | -| Connect via proxy | `cloud-sql-proxy PROJECT:REGION:INSTANCE` | -| Connect directly | `gcloud sql connect INSTANCE --user=postgres --database=DBNAME` | -| Create backup | `gcloud sql backups create --instance=INSTANCE` | -| List backups | `gcloud sql backups list --instance=INSTANCE` | -| Restore backup | `gcloud sql backups restore BACKUP_ID --restore-instance=INSTANCE` | - -### Connection Patterns Overview - -| Pattern | When to Use | -|---|---| -| **Auth Proxy** | Recommended default — handles IAM auth and TLS automatically | -| **Private IP** | GKE/GCE on same VPC — lowest latency, no proxy overhead | -| **PSC (Private Service Connect)** | Cross-project or cross-org access without VPC peering | -| **Public IP + authorized networks** | Legacy only — always enforce SSL, restrict to known CIDRs | - -```bash -# Enable required APIs -gcloud services enable sqladmin.googleapis.com -gcloud services enable sql-component.googleapis.com - -# Create a PostgreSQL instance with HA -gcloud sql instances create my-postgres \ - --database-version=POSTGRES_15 \ - --tier=db-n1-standard-4 \ - --region=us-central1 \ - --availability-type=REGIONAL \ - --storage-type=SSD \ - --storage-size=100GB \ - --storage-auto-increase \ - --backup-start-time=03:00 \ - --enable-bin-log \ - --maintenance-window-day=SUN \ - --maintenance-window-hour=4 \ - --no-assign-ip \ - --network=projects/MY_PROJECT/global/networks/MY_VPC - -# Connect via Auth Proxy -cloud-sql-proxy MY_PROJECT:us-central1:my-postgres --port=5432 & -psql "host=127.0.0.1 port=5432 dbname=mydb user=postgres" -``` - -### Engine-Specific Notes - -**PostgreSQL** — Use `POSTGRES_15` or `POSTGRES_16`. Supports pgvector, PostGIS, pg_stat_statements. Set `max_connections` conservatively; use PgBouncer for connection pooling. - -**MySQL** — Use `MYSQL_8_0`. InnoDB only. `innodb_buffer_pool_size` defaults to 75% of instance RAM. Binary logging required for read replicas. - -**SQL Server** — Use `SQLSERVER_2022_STANDARD` or `ENTERPRISE`. Always-on availability groups supported. Windows Authentication not available; use SQL Server auth or IAM. - -### Backup and Restore - -```bash -# Enable automated backups with PITR -gcloud sql instances patch my-postgres \ - --backup-start-time=03:00 \ - --enable-bin-log \ - --retained-backups-count=14 \ - --retained-transaction-log-days=7 - -# On-demand backup -gcloud sql backups create --instance=my-postgres --description="pre-migration" - -# Point-in-time restore (PostgreSQL/MySQL) -gcloud sql instances clone my-postgres my-postgres-restored \ - --point-in-time="2025-06-15T14:30:00Z" - -# Cross-region replica for disaster recovery -gcloud sql instances create my-postgres-replica \ - --master-instance-name=my-postgres \ - --region=us-east1 -``` - -### Replication - -```bash -# Create read replica (same region) -gcloud sql instances create my-postgres-read \ - --master-instance-name=my-postgres \ - --region=us-central1 - -# Promote replica to standalone (for migrations) -gcloud sql instances promote-replica my-postgres-read - -# List replicas -gcloud sql instances list --filter="masterInstanceName=my-postgres" -``` - -### Security - -```bash -# Enable IAM database authentication -gcloud sql instances patch my-postgres \ - --database-flags=cloudsql.iam_authentication=on - -# Add IAM user (PostgreSQL) -gcloud sql users create user@example.com \ - --instance=my-postgres \ - --type=CLOUD_IAM_USER - -# Enforce SSL -gcloud sql instances patch my-postgres \ - --require-ssl - -# Enable audit logging -gcloud sql instances patch my-postgres \ - --database-flags=cloudsql.enable_pgaudit=on -``` - - - -## Workflow - -### Step 1: Plan Instance Configuration - -Choose engine version, tier (machine type), and storage based on workload. For production, always use `--availability-type=REGIONAL` for HA with automatic failover. Size memory to fit the working dataset with ~30% headroom. - -### Step 2: Configure Networking - -Prefer private IP over public IP. If using private IP, ensure a VPC exists and pass `--network=` and `--no-assign-ip` at creation time. Private IP cannot be added after creation without recreation. For cross-project access, use PSC instead of VPC peering. - -### Step 3: Create Instance and Database Objects - -Create the instance, then create databases and users. Use IAM database authentication over password auth when possible. Store passwords in Secret Manager. - -### Step 4: Set Up Auth Proxy for Application Connections - -Deploy the Cloud SQL Auth Proxy as a sidecar (GKE), standalone binary (GCE), or let Cloud Run handle it automatically with `--add-cloudsql-instances`. The proxy handles TLS and IAM authentication transparently. - -### Step 5: Configure Backups and Monitoring - -Enable automated backups, set PITR retention, and configure maintenance windows during off-peak hours. Enable Query Insights for performance monitoring. Set up alerts for disk usage, CPU, and active connections. - -### Step 6: (Optional) Add Read Replicas - -For read-heavy workloads, create read replicas and update application connection strings to route read queries to replicas. For PostgreSQL, consider PgBouncer as a connection pool in front of both primary and replicas. - - - - - -## Guardrails - -- **Never expose public IP without authorized networks and SSL** — use Auth Proxy or private IP; if public IP is required, set `--require-ssl` and restrict `--authorized-networks` to known CIDRs -- **Always enable automated backups and PITR** — set `--backup-start-time` and `--retained-backups-count` at creation; enabling after the fact risks a gap -- **Set maintenance windows to off-peak hours** — patch windows cause brief downtime on non-HA instances; set `--maintenance-window-day` and `--maintenance-window-hour` -- **Prefer IAM database authentication** over password auth for GCP service accounts and human users; passwords must still be rotated for legacy drivers -- **Size for peak + 30% headroom** — Cloud SQL scales storage automatically but compute requires a patch operation with brief restart -- **Use read replicas for read-heavy workloads** — replicas are not a substitute for connection pooling; address both separately -- **Enable Query Insights** — critical for diagnosing slow queries; off by default on older instances -- **Private IP cannot be added post-creation** — decide at instance creation time; recreation is required to switch - - - - - -### Validation Checkpoint - -Before delivering configurations, verify: - -- [ ] Instance uses `--availability-type=REGIONAL` for production HA -- [ ] Private IP is configured (`--no-assign-ip --network=`) or Auth Proxy is in place -- [ ] Automated backups and PITR are enabled with appropriate retention -- [ ] Maintenance window is set to off-peak hours -- [ ] SSL is enforced (`--require-ssl`) if public IP exists -- [ ] Passwords are stored in Secret Manager, not hardcoded -- [ ] Storage auto-increase is enabled (`--storage-auto-increase`) - - - - - -## Example - -Create a PostgreSQL 15 instance with HA, configure Auth Proxy, and connect a Python application: - -```bash -# 1. Create instance -gcloud sql instances create app-postgres \ - --database-version=POSTGRES_15 \ - --tier=db-n1-standard-2 \ - --region=us-central1 \ - --availability-type=REGIONAL \ - --storage-type=SSD \ - --storage-size=50GB \ - --storage-auto-increase \ - --no-assign-ip \ - --network=projects/my-project/global/networks/my-vpc \ - --backup-start-time=02:00 \ - --retained-backups-count=14 \ - --enable-bin-log \ - --retained-transaction-log-days=7 \ - --maintenance-window-day=SAT \ - --maintenance-window-hour=3 \ - --database-flags=cloudsql.iam_authentication=on - -# 2. Create database and user -gcloud sql databases create myapp --instance=app-postgres -gcloud sql users create myapp-user \ - --instance=app-postgres \ - --password="$(gcloud secrets versions access latest --secret=db-password)" - -# 3. Grant IAM access for a service account -gcloud sql users create sa@my-project.iam \ - --instance=app-postgres \ - --type=CLOUD_IAM_SERVICE_ACCOUNT - -# 4. Start Auth Proxy (local development) -cloud-sql-proxy my-project:us-central1:app-postgres --port=5432 & -``` - -Python connection string using the Auth Proxy (local) or Unix socket (Cloud Run): - -```python -# Via Auth Proxy (local dev / GCE) -DATABASE_URL = "postgresql+asyncpg://myapp-user:password@127.0.0.1:5432/myapp" - -# Via Unix socket (Cloud Run — set INSTANCE_CONNECTION_NAME env var) -import os -INSTANCE = os.environ["INSTANCE_CONNECTION_NAME"] # project:region:instance -DATABASE_URL = f"postgresql+asyncpg://myapp-user:password@/myapp?host=/cloudsql/{INSTANCE}" -``` - - - ---- - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[Connection Patterns](references/connections.md)** - - GKE sidecar, Cloud Run, Compute Engine, local development, PSC, connection strings, pooling. -- **[Engine-Specific Tuning](references/engines.md)** - - PostgreSQL flags and extensions, MySQL InnoDB tuning, SQL Server settings, migration paths. - ---- - -## Cross-References - -- **Gemini CLI extensions**: `gemini extensions install https://github.com/gemini-cli-extensions/cloud-sql-postgresql` (also `cloud-sql-mysql`, `cloud-sql-sqlserver`) -- **Higher performance PostgreSQL**: see `flow:alloydb` -- **GKE deployment patterns**: see `flow:gke` → Cloud SQL on GKE section - ---- - -## Official References - -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [GCP Scripting](https://github.com/cofin/flow/blob/main/templates/styleguides/cloud/gcp_scripting.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/cloud-sql/agents/openai.yaml b/plugins/flow/skills/cloud-sql/agents/openai.yaml deleted file mode 100644 index 059e2ae..0000000 --- a/plugins/flow/skills/cloud-sql/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Cloud SQL" - short_description: "Google Cloud SQL instances, Auth Proxy, replicas, backups, and migrations" diff --git a/plugins/flow/skills/cloud-sql/references/connections.md b/plugins/flow/skills/cloud-sql/references/connections.md deleted file mode 100644 index 3fc9ebd..0000000 --- a/plugins/flow/skills/cloud-sql/references/connections.md +++ /dev/null @@ -1,317 +0,0 @@ -# Cloud SQL Connection Patterns - -## GKE: Auth Proxy Sidecar - -The recommended pattern for GKE workloads is to run the Cloud SQL Auth Proxy as a sidecar container. The application connects to `127.0.0.1` as if the database were local. - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: my-app -spec: - selector: - matchLabels: - app: my-app - template: - metadata: - labels: - app: my-app - spec: - serviceAccountName: my-app-ksa # KSA bound to GSA via Workload Identity - containers: - - name: app - image: gcr.io/my-project/my-app:latest - env: - - name: DB_HOST - value: "127.0.0.1" - - name: DB_PORT - value: "5432" - - name: DB_NAME - value: "myapp" - envFrom: - - secretRef: - name: db-credentials - - - name: cloud-sql-proxy - image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2 - args: - - "--structured-logs" - - "--port=5432" - - "MY_PROJECT:us-central1:my-postgres" - securityContext: - runAsNonRoot: true - resources: - requests: - memory: "32Mi" - cpu: "10m" - limits: - memory: "128Mi" - cpu: "500m" -``` - -### Workload Identity binding - -```bash -# Create GCP service account -gcloud iam service-accounts create my-app-gsa \ - --project=MY_PROJECT - -# Grant Cloud SQL Client role -gcloud projects add-iam-policy-binding MY_PROJECT \ - --member="serviceAccount:my-app-gsa@MY_PROJECT.iam.gserviceaccount.com" \ - --role="roles/cloudsql.client" - -# Bind KSA → GSA -gcloud iam service-accounts add-iam-policy-binding my-app-gsa@MY_PROJECT.iam.gserviceaccount.com \ - --role="roles/iam.workloadIdentityUser" \ - --member="serviceAccount:MY_PROJECT.svc.id.goog[my-namespace/my-app-ksa]" - -# Annotate the Kubernetes service account -kubectl annotate serviceaccount my-app-ksa \ - --namespace=my-namespace \ - iam.gke.io/gcp-service-account=my-app-gsa@MY_PROJECT.iam.gserviceaccount.com -``` - ---- - -## Cloud Run - -### Auto-managed Unix socket (recommended) - -Cloud Run natively manages the Auth Proxy — no sidecar needed. Add the instance connection name at deploy time and connect via Unix socket. - -```bash -gcloud run deploy my-service \ - --image=gcr.io/my-project/my-app:latest \ - --region=us-central1 \ - --add-cloudsql-instances=MY_PROJECT:us-central1:my-postgres \ - --set-env-vars="DB_INSTANCE=MY_PROJECT:us-central1:my-postgres" -``` - -Application connection string (Unix socket): - -```python -# PostgreSQL via psycopg -import os -INSTANCE = os.environ["DB_INSTANCE"] -DSN = f"postgresql://user:pass@/mydb?host=/cloudsql/{INSTANCE}" - -# MySQL via PyMySQL -DSN = f"mysql+pymysql://user:pass@/mydb?unix_socket=/cloudsql/{INSTANCE}" -``` - -### Direct VPC connection (alternative) - -For lower latency, connect via private IP over a Serverless VPC Access connector or Direct VPC Egress: - -```bash -gcloud run deploy my-service \ - --image=gcr.io/my-project/my-app:latest \ - --region=us-central1 \ - --network=my-vpc \ - --subnet=my-subnet \ - --vpc-egress=private-ranges-only \ - --set-env-vars="DB_HOST=10.0.0.5,DB_PORT=5432" -``` - ---- - -## Compute Engine - -### Auth Proxy binary (recommended) - -```bash -# Download the proxy -curl -o cloud-sql-proxy \ - https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.14.1/cloud-sql-proxy.linux.amd64 -chmod +x cloud-sql-proxy - -# Run as a systemd service (see below) or in the foreground -./cloud-sql-proxy --port=5432 MY_PROJECT:us-central1:my-postgres -``` - -Systemd unit file at `/etc/systemd/system/cloud-sql-proxy.service`: - -```ini -[Unit] -Description=Cloud SQL Auth Proxy -After=network.target - -[Service] -Type=simple -User=nobody -ExecStart=/usr/local/bin/cloud-sql-proxy \ - --structured-logs \ - --port=5432 \ - MY_PROJECT:us-central1:my-postgres -Restart=always - -[Install] -WantedBy=multi-user.target -``` - -```bash -systemctl enable --now cloud-sql-proxy -``` - -### Private IP via VPC (lowest latency) - -If the VM is in the same VPC as the Cloud SQL instance, connect directly using the private IP — no proxy needed: - -```bash -psql "host=10.0.0.5 port=5432 dbname=mydb user=postgres sslmode=require" -``` - -Grant the VM's service account `roles/cloudsql.client` to manage SSL certificates automatically. - ---- - -## Local Development - -```bash -# Install the proxy -curl -o cloud-sql-proxy \ - https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.14.1/cloud-sql-proxy.linux.amd64 -chmod +x cloud-sql-proxy - -# Authenticate with application default credentials -gcloud auth application-default login - -# Start proxy (runs in foreground; use & or a separate terminal) -./cloud-sql-proxy MY_PROJECT:us-central1:my-postgres --port=5432 - -# Connect -psql "host=127.0.0.1 port=5432 dbname=mydb user=myuser" -``` - -For multiple instances on different ports: - -```bash -./cloud-sql-proxy \ - "MY_PROJECT:us-central1:postgres-instance?port=5432" \ - "MY_PROJECT:us-central1:mysql-instance?port=3306" -``` - ---- - -## PSC (Private Service Connect) - -PSC allows consuming Cloud SQL from another project or organization without VPC peering. - -```bash -# 1. Enable PSC on the Cloud SQL instance -gcloud sql instances patch my-postgres \ - --enable-google-private-path \ - --no-assign-ip - -# 2. Get the PSC service attachment URI (shown in instance describe) -gcloud sql instances describe my-postgres \ - --format="value(settings.ipConfiguration.pscConfig.serviceAttachmentLink)" -# Output: projects/SERVICE_PROJECT/.../serviceAttachments/... - -# 3. Create PSC endpoint in the consumer project -gcloud compute addresses create psc-cloud-sql-ip \ - --region=us-central1 \ - --subnet=my-subnet - -gcloud compute forwarding-rules create psc-cloud-sql \ - --region=us-central1 \ - --network=my-vpc \ - --address=psc-cloud-sql-ip \ - --target-service-attachment=projects/SERVICE_PROJECT/.../serviceAttachments/... - -# 4. Configure DNS (Cloud DNS private zone) -gcloud dns managed-zones create cloud-sql-zone \ - --dns-name="sql.goog." \ - --visibility=private \ - --networks=my-vpc - -gcloud dns record-sets create my-postgres.sql.goog. \ - --zone=cloud-sql-zone \ - --type=A \ - --ttl=300 \ - --rrdatas=PSC_ENDPOINT_IP -``` - ---- - -## Connection String Formats - -| Engine | Format | -|---|---| -| PostgreSQL (via proxy) | `postgresql://user:pass@127.0.0.1:5432/dbname` | -| PostgreSQL (private IP) | `postgresql://user:pass@10.0.0.5:5432/dbname?sslmode=require` | -| PostgreSQL (Cloud Run socket) | `postgresql://user:pass@/dbname?host=/cloudsql/PROJECT:REGION:INSTANCE` | -| MySQL (via proxy) | `mysql://user:pass@127.0.0.1:3306/dbname` | -| MySQL (Cloud Run socket) | `mysql://user:pass@/dbname?unix_socket=/cloudsql/PROJECT:REGION:INSTANCE` | -| SQL Server (via proxy) | `mssql://user:pass@127.0.0.1:1433/dbname` | - -SQLAlchemy connection strings: - -```python -# PostgreSQL + asyncpg -"postgresql+asyncpg://user:pass@127.0.0.1:5432/dbname" - -# PostgreSQL + psycopg3 -"postgresql+psycopg://user:pass@127.0.0.1:5432/dbname" - -# MySQL + aiomysql -"mysql+aiomysql://user:pass@127.0.0.1:3306/dbname" - -# SQL Server + aioodbc -"mssql+aioodbc://user:pass@127.0.0.1:1433/dbname?driver=ODBC+Driver+18+for+SQL+Server" -``` - ---- - -## Connection Pooling - -### PgBouncer for PostgreSQL - -Cloud SQL does not include a built-in connection pooler for PostgreSQL. For high-concurrency workloads, run PgBouncer in front of Cloud SQL. - -```ini -# /etc/pgbouncer/pgbouncer.ini -[databases] -mydb = host=127.0.0.1 port=5432 dbname=mydb - -[pgbouncer] -listen_port = 6432 -listen_addr = 0.0.0.0 -auth_type = md5 -auth_file = /etc/pgbouncer/userlist.txt -pool_mode = transaction ; use session mode for LISTEN/NOTIFY -max_client_conn = 1000 -default_pool_size = 25 -min_pool_size = 5 -reserve_pool_size = 5 -server_reset_query = DISCARD ALL -``` - -Recommended: run PgBouncer as a Cloud Run service or GKE deployment co-located with the Auth Proxy. - -### ProxySQL for MySQL - -```ini -# /etc/proxysql.cnf (minimal) -datadir="/var/lib/proxysql" - -mysql_servers = -( - { address="127.0.0.1", port=3306, hostgroup=0 } -) - -mysql_users = -( - { username="app", password="pass", default_hostgroup=0 } -) - -mysql_variables = -{ - threads=4 - max_connections=2048 - default_query_timeout=36000000 - interfaces="0.0.0.0:6033" -} -``` diff --git a/plugins/flow/skills/cloud-sql/references/engines.md b/plugins/flow/skills/cloud-sql/references/engines.md deleted file mode 100644 index 3e45d85..0000000 --- a/plugins/flow/skills/cloud-sql/references/engines.md +++ /dev/null @@ -1,361 +0,0 @@ -# Cloud SQL Engine-Specific Tuning - -## PostgreSQL - -### Supported Versions - -`POSTGRES_9_6`, `POSTGRES_10`, `POSTGRES_11`, `POSTGRES_12`, `POSTGRES_13`, `POSTGRES_14`, `POSTGRES_15`, `POSTGRES_16` - -Recommended: `POSTGRES_15` or `POSTGRES_16` for new instances. - -### Key Database Flags - -| Flag | Recommended Value | Notes | -|---|---|---| -| `max_connections` | 100–400 | Cloud SQL enforces a hard cap based on tier; lower = less memory overhead per connection | -| `shared_buffers` | 25% of instance RAM (auto-set) | Cloud SQL sets this automatically; override only if needed | -| `work_mem` | 4–64 MB | Per sort/hash operation per query; multiply by `max_connections * 2` for worst-case RAM | -| `effective_cache_size` | 75% of instance RAM | Planner hint only — set high for better query plans | -| `maintenance_work_mem` | 256 MB | Used for VACUUM, CREATE INDEX; safe to increase temporarily | -| `wal_buffers` | 16 MB | Increase for write-heavy workloads | -| `checkpoint_completion_target` | 0.9 | Spreads checkpoint writes; reduces I/O spikes | -| `log_min_duration_statement` | 1000 | Log queries slower than 1 s (milliseconds) | -| `cloudsql.iam_authentication` | on | Enable IAM database authentication | -| `cloudsql.enable_pgaudit` | on | Enable pgAudit extension for audit logging | - -```bash -# Apply flags at instance creation -gcloud sql instances create my-postgres \ - --database-version=POSTGRES_15 \ - --database-flags=\ -max_connections=200,\ -work_mem=16MB,\ -effective_cache_size=6GB,\ -log_min_duration_statement=1000,\ -cloudsql.iam_authentication=on - -# Update flags on existing instance (brief restart required) -gcloud sql instances patch my-postgres \ - --database-flags=max_connections=300,work_mem=32MB -``` - -### Useful Extensions - -```sql --- pgvector: vector similarity search (AI/ML workloads) -CREATE EXTENSION IF NOT EXISTS vector; - --- PostGIS: geospatial data -CREATE EXTENSION IF NOT EXISTS postgis; -CREATE EXTENSION IF NOT EXISTS postgis_topology; - --- pg_stat_statements: query performance tracking -CREATE EXTENSION IF NOT EXISTS pg_stat_statements; --- Then query: -SELECT query, calls, mean_exec_time, total_exec_time -FROM pg_stat_statements -ORDER BY total_exec_time DESC -LIMIT 20; - --- pgAudit: audit logging (requires flag cloudsql.enable_pgaudit=on) -CREATE EXTENSION IF NOT EXISTS pgaudit; -ALTER SYSTEM SET pgaudit.log = 'ddl, write'; - --- uuid-ossp: UUID generation -CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; - --- pg_trgm: trigram similarity for fuzzy text search -CREATE EXTENSION IF NOT EXISTS pg_trgm; -CREATE INDEX idx_trgm ON mytable USING gin(col gin_trgm_ops); -``` - -### pgvector Setup - -```sql -CREATE EXTENSION IF NOT EXISTS vector; - --- Create a table with an embedding column -CREATE TABLE documents ( - id SERIAL PRIMARY KEY, - content TEXT NOT NULL, - embedding vector(1536) -- dimensions depend on the model -); - --- IVFFlat index (fast approximate search, good default) -CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) - WITH (lists = 100); - --- HNSW index (higher recall, more memory) -CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops) - WITH (m = 16, ef_construction = 64); - --- Nearest-neighbor query -SELECT id, content, - 1 - (embedding <=> $1::vector) AS similarity -FROM documents -ORDER BY embedding <=> $1::vector -LIMIT 10; -``` - -### Version Upgrade Path - -Cloud SQL supports in-place major version upgrades (no data migration required): - -```bash -# Check current version -gcloud sql instances describe my-postgres --format="value(databaseVersion)" - -# Upgrade (brief downtime; back up first) -gcloud sql instances patch my-postgres \ - --database-version=POSTGRES_16 - -# Verify -gcloud sql instances describe my-postgres --format="value(databaseVersion)" -``` - -Supported upgrade paths: 9.6 → 14 → 15 → 16 (multi-step for older versions). - ---- - -## MySQL - -### Supported Versions - -`MYSQL_5_6`, `MYSQL_5_7`, `MYSQL_8_0`, `MYSQL_8_4` - -Recommended: `MYSQL_8_0` or `MYSQL_8_4` for new instances. - -### Key Database Flags - -| Flag | Recommended Value | Notes | -|---|---|---| -| `innodb_buffer_pool_size` | 70–80% of instance RAM | Most important MySQL tuning knob | -| `innodb_log_file_size` | 256 MB–1 GB | Larger = better write throughput, longer recovery | -| `innodb_flush_log_at_trx_commit` | 1 (default) | Set to 2 only for non-critical data (risks 1 s of data loss) | -| `innodb_read_io_threads` | 4–8 | Parallel read I/O threads | -| `innodb_write_io_threads` | 4–8 | Parallel write I/O threads | -| `thread_cache_size` | 16–32 | Reuse threads instead of creating new ones | -| `max_connections` | 500–1000 | Each connection uses ~1 MB RAM | -| `slow_query_log` | on | Enable slow query log | -| `long_query_time` | 1 | Log queries slower than 1 s | -| `binlog_expire_logs_seconds` | 604800 | Binary log retention (7 days) | - -```bash -# Create MySQL 8.0 instance with tuned flags -gcloud sql instances create my-mysql \ - --database-version=MYSQL_8_0 \ - --tier=db-n1-standard-4 \ - --region=us-central1 \ - --availability-type=REGIONAL \ - --database-flags=\ -innodb_buffer_pool_size=3221225472,\ -thread_cache_size=32,\ -slow_query_log=on,\ -long_query_time=1,\ -binlog_expire_logs_seconds=604800 -``` - -### InnoDB Optimization - -```sql --- Check buffer pool hit ratio (should be > 99%) -SELECT (1 - (innodb_buffer_pool_reads / innodb_buffer_pool_read_requests)) * 100 - AS buffer_pool_hit_pct -FROM information_schema.INNODB_METRICS -WHERE name IN ('buffer_pool_reads', 'buffer_pool_read_requests'); - --- Check for table scans (missing indexes) -SHOW GLOBAL STATUS LIKE 'Handler_read%'; --- Handler_read_rnd_next should be low relative to Handler_read_next - --- Analyze slow queries -SELECT query_time, lock_time, rows_examined, sql_text -FROM mysql.slow_log -ORDER BY query_time DESC -LIMIT 20; -``` - -### MySQL 8.0 Notes - -- `query_cache` is removed in MySQL 8.0 — do not set `query_cache_size` -- `utf8mb4` is the default character set (replaces `utf8`) -- `ONLY_FULL_GROUP_BY` SQL mode is enabled by default -- Read replicas require `--enable-bin-log` on the primary - -### Version Upgrade Path - -```bash -# 5.7 → 8.0 (in-place) -gcloud sql instances patch my-mysql --database-version=MYSQL_8_0 - -# 8.0 → 8.4 (in-place) -gcloud sql instances patch my-mysql --database-version=MYSQL_8_4 -``` - -Test application compatibility before upgrading (especially removed features and changed defaults in 8.0). - ---- - -## SQL Server - -### Supported Versions and Editions - -| Version | Editions Available | -|---|---| -| `SQLSERVER_2019_EXPRESS` | Express (dev/test only, 10 GB limit) | -| `SQLSERVER_2019_WEB` | Web | -| `SQLSERVER_2019_STANDARD` | Standard | -| `SQLSERVER_2019_ENTERPRISE` | Enterprise | -| `SQLSERVER_2022_STANDARD` | Standard | -| `SQLSERVER_2022_ENTERPRISE` | Enterprise | - -Recommended: `SQLSERVER_2022_STANDARD` for most production workloads. - -### Key Database Flags - -| Flag | Recommended Value | Notes | -|---|---|---| -| `max degree of parallelism` | 0 (auto) or NUMA node vCPU count | Controls parallel query execution | -| `max server memory (MB)` | 80–90% of instance RAM | Leave headroom for OS and SSAS | -| `cost threshold for parallelism` | 50 | Raise from 5 to prevent trivial parallel plans | -| `optimize for ad hoc workloads` | 1 | Reduces plan cache bloat from single-use queries | -| `remote access` | 0 | Disable unless cross-server queries are needed | - -```bash -# Create SQL Server 2022 Standard instance -gcloud sql instances create my-sqlserver \ - --database-version=SQLSERVER_2022_STANDARD \ - --tier=db-n1-standard-4 \ - --region=us-central1 \ - --availability-type=REGIONAL \ - --root-password="$(gcloud secrets versions access latest --secret=sqlserver-sa-password)" -``` - -### SQL Server Configuration - -```sql --- Set max server memory (run in master db) -EXEC sys.sp_configure 'show advanced options', 1; -RECONFIGURE; - -EXEC sys.sp_configure 'max server memory (MB)', 12288; -- 12 GB on 16 GB instance -RECONFIGURE; - --- Set MAXDOP -EXEC sys.sp_configure 'max degree of parallelism', 4; -RECONFIGURE; - --- Raise cost threshold for parallelism -EXEC sys.sp_configure 'cost threshold for parallelism', 50; -RECONFIGURE; - --- Enable optimize for ad hoc workloads -EXEC sys.sp_configure 'optimize for ad hoc workloads', 1; -RECONFIGURE; -``` - -### tempdb Optimization - -Cloud SQL for SQL Server places tempdb on SSD. For write-heavy workloads, increase the number of tempdb data files to match vCPU count (up to 8): - -```sql --- Check current tempdb file count -SELECT name, physical_name, size * 8 / 1024 AS size_mb -FROM sys.master_files -WHERE database_id = 2; - --- Add tempdb data files (match vCPU count, max 8) -ALTER DATABASE tempdb - ADD FILE (NAME = 'tempdev2', - FILENAME = 'D:\t\tempdb2.ndf', - SIZE = 256MB, - FILEGROWTH = 64MB); -``` - -Note: On Cloud SQL, data file paths are managed automatically. Use ALTER DATABASE for file count changes. - -### Always-On Availability Groups - -Cloud SQL Enterprise and Enterprise Plus editions support Always-On availability groups for automatic failover. These are managed automatically by Cloud SQL's HA configuration — use `--availability-type=REGIONAL` to enable. - -### Version Upgrade Path - -```bash -# 2019 → 2022 (in-place upgrade) -gcloud sql instances patch my-sqlserver \ - --database-version=SQLSERVER_2022_STANDARD -``` - -Test stored procedures, linked servers, and compatibility levels before upgrading production. - ---- - -## Migration Paths - -### Cloud SQL PostgreSQL → AlloyDB - -For workloads that have outgrown Cloud SQL performance limits, migrate to AlloyDB using Database Migration Service (DMS): - -```bash -# Enable DMS -gcloud services enable datamigration.googleapis.com - -# Create a migration job (continuous replication) -gcloud database-migration migration-jobs create pg-to-alloydb \ - --region=us-central1 \ - --type=CONTINUOUS \ - --source=my-source-profile \ - --destination=my-alloydb-profile - -# Start migration -gcloud database-migration migration-jobs start pg-to-alloydb \ - --region=us-central1 - -# Promote (switch traffic to AlloyDB, then cut over) -gcloud database-migration migration-jobs promote pg-to-alloydb \ - --region=us-central1 -``` - -See `flow:alloydb` for AlloyDB instance configuration. - -### On-Premises → Cloud SQL via DMS - -```bash -# Create connection profile for on-premises source -gcloud database-migration connection-profiles create my-source \ - --region=us-central1 \ - --type=POSTGRESQL \ - --host=ON_PREM_IP \ - --port=5432 \ - --username=migration_user \ - --password=PASS - -# Create connection profile for Cloud SQL destination -gcloud database-migration connection-profiles create my-dest \ - --region=us-central1 \ - --type=CLOUDSQL \ - --cloudsql-instance-id=my-postgres - -# Create migration job -gcloud database-migration migration-jobs create onprem-to-cloud \ - --region=us-central1 \ - --type=CONTINUOUS \ - --source=my-source \ - --destination=my-dest \ - --dump-path=gs://my-bucket/dms-dumps -``` - -### MySQL → PostgreSQL via pgloader - -For cross-engine migrations, use `pgloader` to convert schema and data: - -```bash -# Install pgloader -apt-get install -y pgloader - -# Run migration -pgloader mysql://user:pass@127.0.0.1/source_db \ - postgresql://user:pass@127.0.0.1/dest_db -``` - -Review schema differences (data types, auto-increment vs sequences, case sensitivity) before migrating. diff --git a/plugins/flow/skills/cpp/SKILL.md b/plugins/flow/skills/cpp/SKILL.md deleted file mode 100644 index b60f20a..0000000 --- a/plugins/flow/skills/cpp/SKILL.md +++ /dev/null @@ -1,250 +0,0 @@ ---- -name: cpp -description: "Use when editing C++ files, .cpp, .hpp, .cc, .hh, .cxx, CMakeLists.txt, modern C++ APIs, resource ownership, error handling, concurrency, build systems, or native extension code." ---- - -# C++ Development - -## Overview - -Use this skill for modern C++ extension and backend work: safe, maintainable design choices plus a reliable build-and-release pipeline. Covers resource ownership, API boundaries, error handling, concurrency, local builds, git workflow, and CI/CD. - -## Quick Reference - -### Key Design Principles - -| Principle | Rule | -|---|---| -| Resource management | RAII for all resource lifetimes; no raw `new`/`delete` | -| Ownership | `std::unique_ptr` for exclusive, `std::shared_ptr` only when truly shared | -| Error handling | Explicit policy per module (exceptions or error codes); never mix ad hoc | -| Immutability | `const` by default on variables, parameters, and methods | -| API boundaries | Small, stable headers; hide implementation in `.cpp` files | -| Concurrency | Message passing or clear lock ownership; document thread-safety per type | -| Performance | Measure first; avoid allocations in hot loops; keep data cache-friendly | - -### CMake Setup Pattern - -```cmake -cmake_minimum_required(VERSION 3.20) -project(mylib VERSION 1.0.0 LANGUAGES CXX) - -set(CMAKE_CXX_STANDARD 20) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) - -# Library target -add_library(mylib src/mylib.cpp) -target_include_directories(mylib PUBLIC include) - -# Tests -option(BUILD_TESTS "Build tests" ON) -if(BUILD_TESTS) - enable_testing() - add_subdirectory(tests) -endif() -``` - -### Build Commands - -| Action | Command | -|---|---| -| Configure (debug) | `cmake -B build -DCMAKE_BUILD_TYPE=Debug` | -| Configure (release) | `cmake -B build -DCMAKE_BUILD_TYPE=Release` | -| Build | `cmake --build build -j$(nproc)` | -| Test | `ctest --test-dir build --output-on-failure` | -| Tidy check | `clang-tidy src/*.cpp -- -I include` | -| Sanitizer build | `cmake -B build -DCMAKE_CXX_FLAGS="-fsanitize=address,undefined"` | - - - -## Workflow - -### Step 1: Set Up the Build System - -Create `CMakeLists.txt` with C++20 standard, `CMAKE_EXPORT_COMPILE_COMMANDS ON` (for tooling), and separate library/executable/test targets. - -### Step 2: Design the API - -Define public headers in `include/`. Keep headers minimal — forward-declare where possible, use the Pimpl idiom for implementation hiding. Document thread-safety guarantees on public types. - -### Step 3: Implement with RAII - -Use smart pointers for heap allocations, RAII wrappers for file handles / sockets / locks. Never use raw `new`/`delete`. Prefer value types and references over pointers. - -### Step 4: Write Tests - -Use a testing framework (GoogleTest, Catch2). Write tests alongside implementation. Focus on behavior and edge cases, not line coverage. - -### Step 5: Configure CI - -Build matrix across supported OS/arch. Run clang-tidy and sanitizers (ASan, UBSan) in CI. Separate fast unit tests from slower integration tests. Cache dependencies/toolchains. - - - - - -## Guardrails - -- **No raw `new`/`delete`** — use `std::make_unique` / `std::make_shared`; if you need custom allocation, wrap it in an RAII type -- **Prefer `std::` algorithms** over hand-written loops — `std::ranges::find`, `std::transform`, `std::accumulate` are safer and often faster -- **Use sanitizers in CI** — always run AddressSanitizer and UndefinedBehaviorSanitizer; add ThreadSanitizer for concurrent code -- **Do not throw across C ABI boundaries** — catch exceptions at the boundary and convert to error codes -- **Avoid global mutable state** — it creates hidden dependencies and makes testing and concurrency harder -- **Keep critical sections short** — hold locks for the minimum duration; prefer lock-free designs when measured to be necessary -- **Validate inputs early** — check preconditions at API boundaries and return actionable diagnostics - - - - - -### Validation Checkpoint - -Before delivering code, verify: - -- [ ] No raw `new`/`delete` — all allocations use smart pointers or RAII wrappers -- [ ] `CMakeLists.txt` sets C++ standard, exports compile commands, and has test targets -- [ ] Public headers are minimal (forward declarations, no implementation details) -- [ ] Thread-safety guarantees are documented on public types -- [ ] CI configuration includes sanitizers (ASan + UBSan at minimum) -- [ ] Error handling policy is consistent within the module - - - - - -## Example - -CMakeLists.txt and a class demonstrating RAII: - -```cmake -# CMakeLists.txt -cmake_minimum_required(VERSION 3.20) -project(sensor_reader VERSION 0.1.0 LANGUAGES CXX) - -set(CMAKE_CXX_STANDARD 20) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) - -add_library(sensor_reader src/sensor_reader.cpp) -target_include_directories(sensor_reader PUBLIC include) -``` - -```cpp -// include/sensor_reader/sensor_reader.hpp -#pragma once - -#include -#include -#include -#include -#include - -/// Thread-safety: NOT thread-safe. Each instance must be used from one thread. -class SensorReader { -public: - /// Opens a connection to the sensor at the given device path. - /// Throws std::runtime_error if the device cannot be opened. - explicit SensorReader(std::string_view device_path); - - /// RAII: closes the connection on destruction. - ~SensorReader(); - - // Non-copyable, moveable - SensorReader(const SensorReader&) = delete; - SensorReader& operator=(const SensorReader&) = delete; - SensorReader(SensorReader&&) noexcept; - SensorReader& operator=(SensorReader&&) noexcept; - - /// Read up to `buffer.size()` bytes. Returns the number of bytes read. - [[nodiscard]] std::size_t read(std::span buffer) const; - - /// Device path this reader is connected to. - [[nodiscard]] std::string_view device_path() const noexcept; - -private: - struct Impl; - std::unique_ptr impl_; -}; -``` - -```cpp -// src/sensor_reader.cpp -#include "sensor_reader/sensor_reader.hpp" - -#include -#include - -#include -#include - -struct SensorReader::Impl { - std::string device_path; - int fd = -1; - - ~Impl() { - if (fd >= 0) { - ::close(fd); - } - } -}; - -SensorReader::SensorReader(std::string_view device_path) - : impl_(std::make_unique()) { - impl_->device_path = std::string(device_path); - impl_->fd = ::open(impl_->device_path.c_str(), O_RDONLY); - if (impl_->fd < 0) { - throw std::runtime_error("Failed to open device: " + impl_->device_path); - } -} - -SensorReader::~SensorReader() = default; -SensorReader::SensorReader(SensorReader&&) noexcept = default; -SensorReader& SensorReader::operator=(SensorReader&&) noexcept = default; - -std::size_t SensorReader::read(std::span buffer) const { - const auto n = ::read(impl_->fd, buffer.data(), buffer.size()); - if (n < 0) { - throw std::runtime_error("Read failed on device: " + impl_->device_path); - } - return static_cast(n); -} - -std::string_view SensorReader::device_path() const noexcept { - return impl_->device_path; -} -``` - - - ---- - -## References Index - -For detailed guides, refer to the following documents in `references/`: - -- **[Design Best Practices](references/design.md)** - - Modern C++ design and implementation: resource ownership (RAII), API conventions, error handling, performance hygiene, and concurrency. -- **[Build & CI Workflow](references/ci_workflow.md)** - - Local developer workflow, git branching strategy, CI pipeline design, release and compatibility flow. - ---- - -## Official References - -1. C++ Core Guidelines: -2. C++ reference (language/library): -3. CMake docs: -4. Clang-Tidy checks: -5. GitHub Actions docs: -6. GitHub Actions security hardening: -7. Conventional Commits: -8. SemVer: - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/cpp/agents/openai.yaml b/plugins/flow/skills/cpp/agents/openai.yaml deleted file mode 100644 index c60fd97..0000000 --- a/plugins/flow/skills/cpp/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "C++ Development" - short_description: "Modern C++ design, build systems, ownership, APIs, concurrency, and CI" diff --git a/plugins/flow/skills/cpp/references/ci_workflow.md b/plugins/flow/skills/cpp/references/ci_workflow.md deleted file mode 100644 index e094aa4..0000000 --- a/plugins/flow/skills/cpp/references/ci_workflow.md +++ /dev/null @@ -1,43 +0,0 @@ -# C++ Build, Git, and CI Workflow - -## Overview - -Use this reference to keep C++ project delivery predictable: clean local build/test commands, disciplined git flow, and CI pipelines that match release requirements. - -## Local developer workflow - -1. Keep canonical make/cmake targets documented (`release`, `debug`, `test`, `integration`, `tidy-check`). -2. Make environment setup explicit (`configure_ci`-style target for dependencies/toolchains). -3. Use fast local checks before pushing. -4. Keep integration tests reproducible via containers. - -## Git workflow - -1. Branch from `main` with short-lived feature/fix branches. -2. Keep commits atomic and scoped (code, tests, docs). -3. Require green CI before merge. -4. Use semantic version tags for release automation (`vX.Y.Z`, optional prerelease suffix). -5. Keep changelog/release notes synced to tagged artifacts. - -## CI pipeline design - -1. Build matrix should match supported OS/arch/release targets. -2. Separate smoke/unit checks from heavier integration jobs. -3. Cache dependencies/toolchains, but keep cache invalidation clear. -4. Use least-privilege tokens; only elevate when workflow updates require it. -5. Publish artifacts with deterministic naming that encodes extension and DuckDB version. - -## Release and compatibility flow - -1. Trigger release pipeline from tags. -2. Verify artifacts on release page and smoke-load in target runtime. -3. Track compatibility matrix against upstream DuckDB versions. -4. Keep upgrade automation (e.g., DuckDB version update workflows) documented. - -## Review checklist - -1. Can a new contributor build and run tests from docs alone? -2. Do CI jobs mirror local commands? -3. Are release steps repeatable and scriptable? -4. Are platform-specific dependencies explicit? -5. Are secrets/tokens minimally scoped? diff --git a/plugins/flow/skills/cpp/references/design.md b/plugins/flow/skills/cpp/references/design.md deleted file mode 100644 index 3d8be2f..0000000 --- a/plugins/flow/skills/cpp/references/design.md +++ /dev/null @@ -1,50 +0,0 @@ -# C++ Design Best Practices - -## Overview - -Use this reference to keep C++ code practical and safe: explicit ownership, strong API boundaries, predictable error handling, and measurable performance. - -## Core rules - -1. Prefer RAII for all resource lifetimes. -2. Make ownership obvious at type boundaries (`unique_ptr`, references, value types). -3. Keep interfaces small and stable. -4. Use `const` and immutability by default. -5. Avoid hidden global state. - -## Design guidance - -1. Separate pure logic from I/O and side effects. -2. Make invalid states unrepresentable with types where possible. -3. Favor composition over deep inheritance. -4. Keep headers minimal to reduce coupling and rebuild cost. -5. Hide platform-specific code behind narrow adapters. - -## Error and API conventions - -1. Prefer explicit status/exception policy per module (do not mix ad hoc). -2. Do not throw across C ABI boundaries. -3. Validate inputs early and return actionable diagnostics. -4. Keep error messages stable enough for CI/debugging. - -## Performance hygiene - -1. Measure first; optimize real hotspots. -2. Avoid accidental allocations in hot loops. -3. Keep data layouts cache-friendly. -4. Re-check branchy code with representative workloads before and after changes. - -## Concurrency hygiene - -1. Prefer message passing or clear lock ownership over ad hoc shared mutable state. -2. Keep critical sections short. -3. Document thread-safety guarantees for each public type. -4. Use sanitizers and thread tooling in CI where feasible. - -## Code review checklist - -1. Are ownership and lifetimes explicit? -2. Are ABI/error boundaries safe? -3. Are tests focused on behavior and regressions? -4. Are performance claims backed by data? -5. Is the design simple enough for future contributors? diff --git a/plugins/flow/skills/dishka/SKILL.md b/plugins/flow/skills/dishka/SKILL.md deleted file mode 100644 index 2a19d24..0000000 --- a/plugins/flow/skills/dishka/SKILL.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -name: dishka -description: "Use when editing Dishka dependency injection code, Provider, Scope, Container, FromDishka, Inject, DI scopes, providers, testing containers, or Litestar/FastAPI Dishka integrations." ---- - -# Dishka Dependency Injection Skill - -## Overview - -Dishka is a Python dependency injection framework built around Providers, Scopes, and typed containers. It supports async/sync workflows and integrates with web frameworks (Litestar, FastAPI) and CLI tools (Click). - ---- - - - -## References Index - -For detailed guides and configuration examples, refer to the following documents in `references/`: - -- **[Providers, Scopes & Factory Functions](references/providers.md)** - - Core concepts, scope hierarchy, container creation, provider patterns, clean naming, and best practices. -- **[Litestar Integration](references/litestar.md)** - - Setup, controller injection, router integration, and manual resolution from connection. -- **[FastAPI Integration](references/fastapi.md)** - - Setup and route-level injection with FromDishka. -- **[CLI Integration](references/cli.md)** - - Click with async_inject decorator for Dishka-powered CLI commands. -- **[Testing Patterns](references/testing.md)** - - Test containers, mock providers, and override strategies. - - - - - -## Example: Provider and Container Setup - -```python -from dishka import Provider, Scope, make_async_container, provide - -class AppProvider(Provider): - scope = Scope.APP - - @provide - async def get_db_engine(self) -> AsyncEngine: - return create_async_engine("postgresql+asyncpg://...") - -class RequestProvider(Provider): - scope = Scope.REQUEST - - @provide - async def get_session(self, engine: AsyncEngine) -> AsyncSession: - return AsyncSession(engine) - -container = make_async_container(AppProvider(), RequestProvider()) -``` - - - ---- - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Dishka](https://github.com/cofin/flow/blob/main/templates/styleguides/frameworks/dishka.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -- **Explicitly manage Scopes (APP, REQUEST)** -- Always use the appropriate scope to avoid resource leaks or unnecessary object creation. Objects in `Scope.APP` live as long as the container; `Scope.REQUEST` lives only for the duration of a request. -- **Avoid global container access** -- Always use dependency injection to provide dependencies; never resolve objects from a global container instance in application logic. -- **Ensure Providers are stateless** -- Providers should only contain factory methods; any state should be managed within the injected objects themselves. -- **Check scope hierarchy** -- Objects in a wider scope (APP) cannot depend on objects in a narrower scope (REQUEST). -- **Use typed providers** -- Always use type hints for provider return values to ensure the container can correctly resolve and validate dependencies. - - - -## Validation Checkpoint - -- [ ] Providers are assigned the correct `Scope` (APP, REQUEST) -- [ ] No objects are resolved manually from a global container -- [ ] All factory methods in providers are correctly annotated with `@provide` -- [ ] Scope hierarchy is valid (no narrow-to-wide scope dependencies) -- [ ] Provider return types match the types expected by the consumers -- [ ] Async/sync providers are used consistently with the target framework - diff --git a/plugins/flow/skills/dishka/agents/openai.yaml b/plugins/flow/skills/dishka/agents/openai.yaml deleted file mode 100644 index 5a2e93a..0000000 --- a/plugins/flow/skills/dishka/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Dishka" - short_description: "Dishka dependency injection providers, scopes, containers, and integrations" diff --git a/plugins/flow/skills/dishka/references/cli.md b/plugins/flow/skills/dishka/references/cli.md deleted file mode 100644 index 2c3eea4..0000000 --- a/plugins/flow/skills/dishka/references/cli.md +++ /dev/null @@ -1,51 +0,0 @@ -# CLI Integration - -## Click with Async Injection - -```python -import functools -from collections.abc import Callable, Coroutine -from typing import Any, ParamSpec, TypeVar - -import anyio -from dishka import AsyncContainer - -P = ParamSpec("P") -R = TypeVar("R") - -def async_inject( - func: Callable[P, Coroutine[Any, Any, R]], -) -> Callable[..., R]: - """Decorator for Click commands with Dishka injection.""" - - @functools.wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> R: - async def run() -> R: - container = make_cli_container() - async with container() as request_container: - # Resolve dependencies from type hints - resolved = {} - for name, hint in func.__annotations__.items(): - if name == "return": - continue - if name not in kwargs or kwargs[name] is None: - try: - resolved[name] = await request_container.get(hint) - except Exception: - pass # Not a DI type, skip - return await func(*args, **{**kwargs, **resolved}) - return anyio.from_thread.run(run) - - return wrapper - -# Usage -@click.command() -@click.option("--email", "-e", required=True) -@async_inject -async def create_user( - user_service: UserService, # Injected by Dishka - email: str, # From Click option -) -> None: - user = await user_service.create(email=email) - print(f"Created: {user.id}") -``` diff --git a/plugins/flow/skills/dishka/references/fastapi.md b/plugins/flow/skills/dishka/references/fastapi.md deleted file mode 100644 index 7ff4053..0000000 --- a/plugins/flow/skills/dishka/references/fastapi.md +++ /dev/null @@ -1,14 +0,0 @@ -# FastAPI Integration - -```python -from dishka.integrations.fastapi import FromDishka, setup_dishka -from fastapi import FastAPI - -app = FastAPI() -container = make_async_container(ServiceProvider()) -setup_dishka(container, app) - -@app.get("/users") -async def list_users(service: FromDishka[UserService]) -> list[User]: - return await service.list_all() -``` diff --git a/plugins/flow/skills/dishka/references/litestar.md b/plugins/flow/skills/dishka/references/litestar.md deleted file mode 100644 index 2cf4a8f..0000000 --- a/plugins/flow/skills/dishka/references/litestar.md +++ /dev/null @@ -1,77 +0,0 @@ -# Litestar Integration - -> [!WARNING] -> **Discourage Litestar Default Displacement:** -> Use Dishka primarily for domain services, application configuration, and database adapters. -> Do **NOT** try to configure Dishka to provide Litestar native primitives like `Request`, `Response`, `State`, or `WebSocket`. -> Displacing Litestar's highly optimized native dependency injection for these objects can lead to request scoping issues and unnecessary overhead. - -## Setup - -```python -from dishka.integrations.litestar import setup_dishka, LitestarProvider -from litestar import Litestar - -container = make_async_container( - LitestarProvider(), # Provides Request, State, etc. - PersistenceProvider(), - DomainServiceProvider(), -) - -app = Litestar(route_handlers=[...]) -setup_dishka(container, app) -``` - -## Controller Injection - -```python -from dishka.integrations.litestar import FromDishka as Inject - -class UserController(Controller): - path = "/api/users" - - @get(operation_id="ListUsers") - async def list_users( - self, - service: Inject[UserService], # Injected by Dishka - ) -> list[User]: - return await service.list_all() - - @get("/{user_id:uuid}") - async def get_user( - self, - service: Inject[UserService], - user_id: UUID, # From path parameter - ) -> User: - return await service.get(user_id) -``` - -## LitestarRouter Integration - -When organizing controllers, rely on the standard Litestar `Router`. Dishka injections are automatically resolved down the router tree once `setup_dishka` is applied to the main `Litestar` app. No special Dishka wrappers are needed. - -```python -from litestar import Router - -router = Router( - path="/api", - route_handlers=[UserController, OrderController], -) -``` - -## Manual Resolution from Connection - -```python -async def get_from_connection( - connection: ASGIConnection, - dependency_type: type[T], -) -> T: - """Get dependency from Dishka container via connection.""" - container: AsyncContainer = connection.state.dishka_container - return await container.get(dependency_type) - -# Usage in middleware, guards, JWT callbacks -async def jwt_auth_callback(token: str, connection: ASGIConnection) -> User: - service = await get_from_connection(connection, UserService) - return await service.get_by_token(token) -``` diff --git a/plugins/flow/skills/dishka/references/providers.md b/plugins/flow/skills/dishka/references/providers.md deleted file mode 100644 index 6c11264..0000000 --- a/plugins/flow/skills/dishka/references/providers.md +++ /dev/null @@ -1,194 +0,0 @@ -# Providers, Scopes & Factory Functions - -## Core Concepts - -```python -from dishka import Provider, Scope, provide, make_async_container, AsyncContainer - -class MyProvider(Provider): - """Providers group related dependencies.""" - - @provide(scope=Scope.APP) - def provide_config(self) -> Config: - """App-scoped: created once, shared across all requests.""" - return Config.from_env() - - @provide(scope=Scope.REQUEST) - def provide_service(self, config: Config) -> MyService: - """Request-scoped: created per request, auto-injected deps.""" - return MyService(config) - - @provide(scope=Scope.REQUEST) - async def provide_async_resource(self) -> AsyncIterable[DBConnection]: - """ - [CRITICAL] TEARDOWN LOGIC MUST USE AsyncIterable: - Always yield the resource and perform cleanup after the yield. - Do not use standard returning methods if the resource requires teardown. - """ - conn = await create_connection() - try: - yield conn - finally: - await conn.close() -``` - -## Scopes - -| Scope | Lifetime | Use Case | -|-------|----------|----------| -| `Scope.APP` | Application lifetime | Config, connection pools, singletons | -| `Scope.REQUEST` | Single request | Services, database sessions, user context | -| `Scope.ACTION` | Sub-request operation | Nested transactions, batch operations | -| `Scope.STEP` | Single resolution | Factories, unique instances | - -## Container Creation - -```python -from dishka import make_async_container, make_container - -# Async container (for async frameworks) -container = make_async_container( - ConfigProvider(), - PersistenceProvider(), - DomainServiceProvider(), -) - -# Sync container -container = make_container(ConfigProvider(), ServiceProvider()) -``` - -## Clean Naming Pattern (Inject[T]) - -Create framework-agnostic aliases for cleaner code, specifically targeting `Inject[T]` to simplify controller signatures: - -```python -# di.py - Central DI module -from dishka import AsyncContainer, Container, Provider, Scope -from dishka import make_async_container, make_container, provide -from dishka.integrations.litestar import FromDishka as Inject -from dishka.integrations.litestar import LitestarProvider, setup_dishka - -__all__ = [ - "AsyncContainer", - "Container", - "Inject", # Clean alias for FromDishka - "Provider", - "Scope", - "make_async_container", - "make_container", - "provide", - "setup_dishka", -] -``` - -Usage with clean naming: - -```python -from myapp.di import Inject - -@get("/users") -async def list_users(service: Inject[UserService]) -> list[User]: - return await service.list_all() -``` - -## Persistence Provider - -```python -from collections.abc import AsyncIterable - -class PersistenceProvider(Provider): - """Database connection provider.""" - - @provide(scope=Scope.REQUEST) - async def provide_driver(self) -> AsyncIterable[AsyncDriverAdapterBase]: - """Provide database session with automatic cleanup.""" - async with db_manager.provide_session(db) as driver: - yield driver -``` - -## Domain Service Provider - -```python -class DomainServiceProvider(Provider): - """Business logic services provider.""" - - @provide(scope=Scope.REQUEST) - def provide_user_service( - self, - driver: AsyncDriverAdapterBase, # Auto-injected - ) -> UserService: - return UserService(driver) - - @provide(scope=Scope.REQUEST) - def provide_order_service( - self, - driver: AsyncDriverAdapterBase, - user_service: UserService, # Can depend on other services - ) -> OrderService: - return OrderService(driver, user_service) -``` - -## External Service Provider - -```python -class EmailServiceProvider(Provider): - """Third-party service integration.""" - - @provide(scope=Scope.REQUEST) - async def provide_email_service(self) -> AsyncIterable[EmailService]: - async with email_backend.provide_service() as service: - yield service - - @provide(scope=Scope.REQUEST) - def provide_notification_service( - self, - email: EmailService, - config: Config, - ) -> NotificationService: - return NotificationService(email, config) -``` - -## Factory Functions (Alternative to Methods) - -```python -from dishka import provide, Scope - -@provide(scope=Scope.REQUEST) -async def provide_cache() -> AsyncIterable[CacheClient]: - client = await CacheClient.connect() - yield client - await client.close() - -# Register in container -container = make_async_container( - provide_cache, # Functions work too - ServiceProvider(), -) -``` - -## Best Practices - -1. **Scope Selection**: - - Use `Scope.APP` sparingly (config, pools) - - Default to `Scope.REQUEST` for services - - Use `Scope.STEP` for factories - -2. **Provider Organization**: - - Group related dependencies in one Provider - - Separate infrastructure (DB, cache) from domain services - - Create framework-specific providers (Litestar, CLI) - -3. **Clean Naming**: - - Create `Inject` alias for `FromDishka` - - Centralize DI exports in single module - - Use type hints, not string references - -4. **Resource Management**: - - Use `AsyncIterable` for cleanup - - Yield resources, cleanup after yield - - Let Dishka manage lifecycle - -5. **Testability**: - - Design providers for easy replacement - - Create test-specific providers - - Avoid global state in providers diff --git a/plugins/flow/skills/dishka/references/testing.md b/plugins/flow/skills/dishka/references/testing.md deleted file mode 100644 index cff8123..0000000 --- a/plugins/flow/skills/dishka/references/testing.md +++ /dev/null @@ -1,46 +0,0 @@ -# Testing Patterns - -## Test Container - -```python -import pytest -from dishka import make_async_container, Provider, provide, Scope - -class TestProvider(Provider): - """Mock provider for tests.""" - - @provide(scope=Scope.REQUEST) - def provide_user_service(self) -> UserService: - return MockUserService() - -@pytest.fixture -async def container(): - container = make_async_container(TestProvider()) - yield container - await container.close() - -@pytest.fixture -async def user_service(container): - async with container() as request: - yield await request.get(UserService) -``` - -## Override in Tests - -```python -from dishka import Provider, provide, Scope - -class MockPersistenceProvider(Provider): - """Replace real DB with in-memory for tests.""" - - @provide(scope=Scope.REQUEST) - async def provide_driver(self) -> AsyncIterable[AsyncDriverAdapterBase]: - async with in_memory_db() as driver: - yield driver - -# Use mock provider in test container -test_container = make_async_container( - MockPersistenceProvider(), # Replaces real persistence - DomainServiceProvider(), # Real domain services -) -``` diff --git a/plugins/flow/skills/docker/SKILL.md b/plugins/flow/skills/docker/SKILL.md deleted file mode 100644 index bad999e..0000000 --- a/plugins/flow/skills/docker/SKILL.md +++ /dev/null @@ -1,298 +0,0 @@ ---- -name: docker -description: "Use when editing Dockerfile, Containerfile-like Docker syntax, docker-compose.yml, docker-compose.yaml, .dockerignore, multi-stage builds, BuildKit cache mounts, Compose services, or image optimization." ---- - -# Docker - -## Overview - -Docker provides OS-level virtualization via containers. This skill covers Dockerfile best practices, multi-stage builds, distroless images, Compose orchestration, and BuildKit optimizations. - ---- - -## Multi-Stage Build Quick Reference - -Multi-stage builds separate build-time dependencies from the runtime image, producing minimal production images. - -```dockerfile -# ---- Stage 1: dependency builder ---- -FROM python:3.12-slim-bookworm AS builder -WORKDIR /app -RUN pip install --no-cache-dir uv -COPY pyproject.toml uv.lock ./ -# Cache uv's package download cache across builds -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --frozen --no-dev --no-editable - -# ---- Stage 2: runtime (distroless, non-root) ---- -FROM gcr.io/distroless/python3-debian12:nonroot -WORKDIR /app -COPY --from=builder /app/.venv/lib/python3.12/site-packages /usr/lib/python3.12/site-packages -COPY src/ ./src/ -ENTRYPOINT ["python", "-m", "myapp"] -``` - -Key rules: - -- Name every stage (`AS builder`, `AS runner`, etc.). -- Only the final stage ends up in the shipped image. -- Copy only what is needed from earlier stages with `COPY --from=`. - ---- - -## Compose Quick Reference - -```yaml -# compose.yml -services: - app: - build: . - image: myapp:dev - ports: - - "8000:8000" - environment: - DATABASE_URL: postgresql://app:secret@db:5432/mydb - depends_on: - db: - condition: service_healthy - restart: unless-stopped - - db: - image: postgres:16-alpine - environment: - POSTGRES_USER: app - POSTGRES_PASSWORD: secret - POSTGRES_DB: mydb - volumes: - - pg_data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U app -d mydb"] - interval: 10s - timeout: 5s - retries: 5 - -volumes: - pg_data: -``` - -```bash -# Common Compose commands -docker compose up -d # start detached -docker compose logs -f app # follow service logs -docker compose exec app bash # shell into running container -docker compose down -v # stop and remove volumes -docker compose build --no-cache # full rebuild -``` - ---- - -## BuildKit Cache Mounts - -`--mount=type=cache` persists a directory between builds so package managers do not re-download. - -```dockerfile -# uv (Python) -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --frozen --no-dev - -# pip -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements.txt - -# apt -RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ - --mount=type=cache,target=/var/lib/apt,sharing=locked \ - apt-get update && apt-get install -y --no-install-recommends curl - -# npm -RUN --mount=type=cache,target=/root/.npm \ - npm ci --omit=dev - -# Go modules -RUN --mount=type=cache,target=/go/pkg/mod \ - go mod download -``` - -Enable BuildKit (default in Docker 23+): - -```bash -export DOCKER_BUILDKIT=1 -docker build . -# or -docker buildx build . -``` - ---- - -## Production Patterns - -### uv Package Manager - -`uv` is a fast Python package/project manager. Use it as the build-stage installer, then copy the resulting `.venv` into the runtime stage. - -```dockerfile -FROM python:3.12-slim-bookworm AS builder -WORKDIR /app -RUN pip install --no-cache-dir uv -COPY pyproject.toml uv.lock ./ -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --frozen --no-dev --no-editable -``` - -### Distroless Base Images - -| Image | Use case | -|-------|----------| -| `gcr.io/distroless/static-debian12:nonroot` | Statically compiled binaries (Go, Rust) | -| `gcr.io/distroless/base-debian12:nonroot` | Dynamically linked, needs glibc | -| `gcr.io/distroless/python3-debian12:nonroot` | Python applications | -| `gcr.io/distroless/nodejs22-debian12:nonroot` | Node.js applications | - -Always use the `:nonroot` tag — the image user is UID 65532. - -### tini Init - -`tini` properly forwards signals and reaps zombie processes. Use it when the base image does not include an init system (e.g., non-distroless slim images). - -```dockerfile -FROM python:3.12-slim-bookworm -RUN apt-get update \ - && apt-get install -y --no-install-recommends tini \ - && rm -rf /var/lib/apt/lists/* -ENTRYPOINT ["tini", "--"] -CMD ["python", "-m", "myapp"] -``` - -Distroless images already run as non-root; for non-distroless images add tini + explicit non-root user. - -### Non-Root User (UID 65532) - -UID 65532 is the `nonroot` user in distroless images. Align custom user IDs with this value for consistency. - -```dockerfile -# For non-distroless images -RUN groupadd --gid 65532 nonroot \ - && useradd --uid 65532 --gid 65532 --no-create-home --shell /bin/false nonroot -USER nonroot -``` - -### .dockerignore - -```text -.git -.github -.venv -__pycache__ -*.pyc -*.pyo -node_modules -.env -.env.* -Dockerfile -docker-compose*.yml -compose*.yml -.dockerignore -coverage -.pytest_cache -.mypy_cache -.ruff_cache -dist -build -*.md -!README.md -``` - - - -## Workflow - -1. **Write Dockerfile** — multi-stage, pin base tags, use cache mounts. -2. **Write .dockerignore** — exclude `.git`, `.env`, `node_modules`, `__pycache__`. -3. **Build locally** — `docker buildx build -t myimage:dev .` -4. **Inspect** — `docker image inspect myimage:dev` for size; `dive myimage:dev` for layer breakdown. -5. **Run as non-root check** — `docker run --rm myimage:dev id` should print `uid=65532`. -6. **Compose integration** — use `compose.yml` with health checks and `depends_on` conditions. - - - - - -## Guardrails - -- **Always multi-stage** — never ship build tools, compilers, or dev dependencies in the final image. -- **Always non-root** — use `:nonroot` distroless tags or add an explicit non-root user (UID 65532). Never run as root in production. -- **Always .dockerignore** — prevents leaking `.env`, secrets, `.git`, and large directories into the build context. -- **Pin base image tags** — use full tags (`python:3.12-slim-bookworm`, not `python:latest`) to ensure reproducible builds. -- **Use BuildKit cache mounts** for all package managers to keep CI builds fast. -- **No secrets in layers** — never `COPY .env` or `RUN echo SECRET=...`. Use `--secret` mount or runtime injection. - - - - - -## Validation Checkpoint - -Before delivering Dockerfile or Compose config, verify: - -- [ ] Multi-stage build separates builder from runtime stage -- [ ] Base image tags are pinned (no `:latest`) -- [ ] `.dockerignore` is present and excludes secrets/caches -- [ ] Final image runs as non-root (UID 65532 or equivalent) -- [ ] No secrets baked into layers -- [ ] Cache mounts used for package manager steps -- [ ] Health check defined in Compose or Dockerfile for long-running services - - - ---- - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[Dockerfile Patterns](references/dockerfile.md)** - - Multi-stage builds, distroless images, TARGETARCH for multi-arch, non-root users, tini init, .dockerignore, uv cache mounts. -- **[Compose](references/compose.md)** - - docker-compose.yml patterns, service dependencies, volumes, networks, health checks. -- **[Optimization](references/optimization.md)** - - Layer caching, BuildKit cache mounts, minimal base images, bytecode compilation, reducing image size. - ---- - - - -## Example: Multi-Stage Python Dockerfile - -```dockerfile -# Build stage -FROM python:3.12-slim-bookworm AS builder -WORKDIR /app -RUN pip install --no-cache-dir uv -COPY pyproject.toml uv.lock ./ -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --frozen --no-dev --no-editable - -# Runtime stage -FROM gcr.io/distroless/python3-debian12:nonroot -WORKDIR /app -COPY --from=builder /app/.venv/lib/python3.12/site-packages /usr/lib/python3.12/site-packages -COPY src/ ./src/ -ENTRYPOINT ["python", "-m", "myapp"] -``` - - - ---- - -## Official References - -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Docker](https://github.com/cofin/flow/blob/main/templates/styleguides/tools/docker.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/docker/agents/openai.yaml b/plugins/flow/skills/docker/agents/openai.yaml deleted file mode 100644 index ce7ee62..0000000 --- a/plugins/flow/skills/docker/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Docker" - short_description: "Dockerfiles, Compose, BuildKit, image optimization, and container workflows" diff --git a/plugins/flow/skills/docker/references/compose.md b/plugins/flow/skills/docker/references/compose.md deleted file mode 100644 index 58787a4..0000000 --- a/plugins/flow/skills/docker/references/compose.md +++ /dev/null @@ -1,233 +0,0 @@ -# Docker Compose Patterns - -## Basic Service Definition - -```yaml -# docker-compose.yml -services: - api: - build: - context: . - dockerfile: Dockerfile - target: development # Multi-stage target - ports: - - "8080:8080" - environment: - DATABASE_URL: postgresql://postgres:secret@db:5432/myapp - REDIS_URL: redis://redis:6379 - depends_on: - db: - condition: service_healthy - redis: - condition: service_started - volumes: - - ./src:/app/src # Dev hot-reload - restart: unless-stopped - - db: - image: postgres:16-bookworm - environment: - POSTGRES_PASSWORD: secret - POSTGRES_DB: myapp - ports: - - "5432:5432" - volumes: - - pg-data:/var/lib/postgresql/data - - ./init.sql:/docker-entrypoint-initdb.d/init.sql - healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres"] - interval: 5s - timeout: 5s - retries: 5 - - redis: - image: redis:7-alpine - ports: - - "6379:6379" - volumes: - - redis-data:/data - -volumes: - pg-data: - redis-data: -``` - -## Networks - -```yaml -services: - frontend: - networks: - - frontend-net - - api: - networks: - - frontend-net - - backend-net - - db: - networks: - - backend-net - -networks: - frontend-net: - backend-net: - internal: true # No external access -``` - -## Environment Variables - -```yaml -services: - api: - # Inline - environment: - NODE_ENV: production - LOG_LEVEL: info - - # From file - env_file: - - .env - - .env.local - - # From host (passthrough) - environment: - - AWS_ACCESS_KEY_ID # Uses host value -``` - -## Profiles (Conditional Services) - -```yaml -services: - api: - build: . - # Always starts (no profile) - - debug-tools: - image: busybox - profiles: - - debug - - monitoring: - image: grafana/grafana - profiles: - - monitoring -``` - -```bash -# Start with specific profiles -docker compose --profile debug --profile monitoring up -``` - -## Override Files - -```yaml -# docker-compose.yml (base) -services: - api: - image: myapp:latest - ports: - - "8080:8080" - -# docker-compose.override.yml (auto-loaded in dev) -services: - api: - build: . - volumes: - - ./src:/app/src - environment: - DEBUG: "true" - -# docker-compose.prod.yml -services: - api: - deploy: - replicas: 3 - resources: - limits: - memory: 512M - cpus: "0.5" -``` - -```bash -# Dev (uses override automatically) -docker compose up - -# Production -docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d -``` - -## Health Checks - -```yaml -services: - api: - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/health"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 40s - - db: - healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres"] - interval: 5s - timeout: 5s - retries: 5 -``` - -## Secrets - -```yaml -services: - api: - secrets: - - db_password - -secrets: - db_password: - file: ./secrets/db_password.txt -``` - -## Build Arguments - -```yaml -services: - api: - build: - context: . - args: - - BUILD_VERSION=1.2.3 - - PYTHON_VERSION=3.12 - cache_from: - - myapp:cache - platforms: - - linux/amd64 - - linux/arm64 -``` - -## Common Commands - -```bash -# Start services -docker compose up -d - -# Rebuild and start -docker compose up -d --build - -# View logs -docker compose logs -f api - -# Execute command in running service -docker compose exec api sh - -# Run one-off command -docker compose run --rm api python manage.py migrate - -# Stop and remove -docker compose down - -# Stop, remove, and delete volumes -docker compose down -v -``` diff --git a/plugins/flow/skills/docker/references/dockerfile.md b/plugins/flow/skills/docker/references/dockerfile.md deleted file mode 100644 index 45037e2..0000000 --- a/plugins/flow/skills/docker/references/dockerfile.md +++ /dev/null @@ -1,159 +0,0 @@ -# Dockerfile Patterns - -## Multi-Stage Build - -```dockerfile -# Stage 1: Build -FROM golang:1.22-bookworm AS builder -WORKDIR /app -COPY go.mod go.sum ./ -RUN go mod download -COPY . . -RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} go build -ldflags="-s -w" -o /app/server ./cmd/server - -# Stage 2: Runtime (distroless) -FROM gcr.io/distroless/static-debian12:nonroot -COPY --from=builder /app/server /server -EXPOSE 8080 -ENTRYPOINT ["/server"] -``` - -## Multi-Architecture Builds - -```dockerfile -# TARGETARCH is automatically set by BuildKit -ARG TARGETARCH - -FROM --platform=$BUILDPLATFORM golang:1.22 AS builder -ARG TARGETARCH -RUN GOARCH=${TARGETARCH} go build -o /app/server - -FROM gcr.io/distroless/static-debian12:nonroot -COPY --from=builder /app/server /server -ENTRYPOINT ["/server"] -``` - -```bash -# Build for multiple architectures -docker buildx build --platform linux/amd64,linux/arm64 -t myapp:latest --push . -``` - -## Distroless Base Images - -Choose the right distroless image: - -| Image | Use Case | -|-------|----------| -| `gcr.io/distroless/static-debian12` | Statically compiled (Go, Rust) | -| `gcr.io/distroless/cc-debian12` | C/C++ with dynamically linked libs | -| `gcr.io/distroless/base-debian12` | Needs glibc + libssl + openssl | -| `gcr.io/distroless/python3-debian12` | Python applications | -| `gcr.io/distroless/java21-debian12` | Java applications | - -Always use the `:nonroot` tag variant for non-root execution. - -## Python Multi-Stage with uv Cache Mount - -Use `--mount=type=cache` to persist uv's download cache across builds. This avoids re-downloading packages on every build when only source code changes. - -```dockerfile -FROM python:3.12-slim-bookworm AS builder -WORKDIR /app -RUN pip install --no-cache-dir uv -COPY pyproject.toml uv.lock ./ -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --frozen --no-dev --no-editable -COPY src/ ./src/ - -FROM gcr.io/distroless/python3-debian12:nonroot -WORKDIR /app -COPY --from=builder /app/.venv/lib/python3.12/site-packages /usr/lib/python3.12/site-packages -COPY --from=builder /app/src ./src -ENTRYPOINT ["python", "-m", "myapp"] -``` - -## Node.js Multi-Stage - -```dockerfile -FROM node:22-bookworm-slim AS builder -WORKDIR /app -COPY package.json package-lock.json ./ -RUN npm ci --omit=dev -COPY . . -RUN npm run build - -FROM gcr.io/distroless/nodejs22-debian12:nonroot -WORKDIR /app -COPY --from=builder /app/dist ./dist -COPY --from=builder /app/node_modules ./node_modules -EXPOSE 3000 -CMD ["dist/server.js"] -``` - -## Non-Root User - -UID 65532 is the `nonroot` user in distroless images. Align custom user IDs with this value for consistency across distroless and non-distroless images. - -```dockerfile -# For non-distroless images — use UID 65532 to match distroless nonroot -FROM python:3.12-slim-bookworm -RUN groupadd --gid 65532 nonroot \ - && useradd --uid 65532 --gid 65532 --no-create-home --shell /bin/false nonroot -USER nonroot -WORKDIR /app -COPY --chown=nonroot:nonroot src/ ./src/ -``` - -## Tini Init System - -Use tini to properly handle signals and reap zombie processes: - -```dockerfile -FROM python:3.12-slim-bookworm -RUN apt-get update && apt-get install -y --no-install-recommends tini && rm -rf /var/lib/apt/lists/* -ENTRYPOINT ["tini", "--"] -CMD ["python", "-m", "myapp"] -``` - -## .dockerignore - -```text -# .dockerignore -.git -.github -.venv -__pycache__ -*.pyc -node_modules -.env -.env.* -*.md -!README.md -Dockerfile -docker-compose*.yml -.dockerignore -coverage -.pytest_cache -.mypy_cache -dist -build -``` - -## Health Checks - -```dockerfile -HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ - CMD ["/app/healthcheck"] - -# Or with curl (adds curl to image) -HEALTHCHECK --interval=30s --timeout=5s --retries=3 \ - CMD curl -f http://localhost:8080/health || exit 1 -``` - -## Labels & Metadata - -```dockerfile -LABEL org.opencontainers.image.source="https://github.com/org/repo" -LABEL org.opencontainers.image.description="My application" -LABEL org.opencontainers.image.version="1.0.0" -``` diff --git a/plugins/flow/skills/docker/references/optimization.md b/plugins/flow/skills/docker/references/optimization.md deleted file mode 100644 index b518965..0000000 --- a/plugins/flow/skills/docker/references/optimization.md +++ /dev/null @@ -1,185 +0,0 @@ -# Docker Image Optimization - -## Layer Caching - -Order instructions from least to most frequently changing: - -```dockerfile -# 1. Base image (rarely changes) -FROM python:3.12-slim-bookworm - -# 2. System dependencies (changes occasionally) -RUN apt-get update && \ - apt-get install -y --no-install-recommends libpq-dev && \ - rm -rf /var/lib/apt/lists/* - -# 3. Application dependencies (changes when deps update) -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -# 4. Application code (changes most frequently) -COPY . . -``` - -## BuildKit Cache Mounts - -Cache package manager downloads across builds: - -```dockerfile -# syntax=docker/dockerfile:1 - -# pip cache -RUN --mount=type=cache,target=/root/.cache/pip \ - pip install -r requirements.txt - -# apt cache -RUN --mount=type=cache,target=/var/cache/apt \ - --mount=type=cache,target=/var/lib/apt/lists \ - apt-get update && apt-get install -y libpq-dev - -# Go modules cache -RUN --mount=type=cache,target=/go/pkg/mod \ - --mount=type=cache,target=/root/.cache/go-build \ - go build -o /app/server - -# npm cache -RUN --mount=type=cache,target=/root/.npm \ - npm ci --omit=dev - -# Cargo cache (Rust) -RUN --mount=type=cache,target=/usr/local/cargo/registry \ - --mount=type=cache,target=/app/target \ - cargo build --release -``` - -## Secret Mounts - -Use secrets during build without leaking them to the image: - -```dockerfile -# syntax=docker/dockerfile:1 -RUN --mount=type=secret,id=npmrc,target=/root/.npmrc \ - npm ci --omit=dev -``` - -```bash -docker build --secret id=npmrc,src=.npmrc . -``` - -## Minimal Base Images - -| Base Image | Size | Use Case | -|-----------|------|----------| -| `scratch` | 0 MB | Static Go/Rust binaries | -| `gcr.io/distroless/static` | ~2 MB | Static binaries needing CA certs | -| `alpine:3.19` | ~7 MB | Minimal with shell + package manager | -| `debian:bookworm-slim` | ~80 MB | Need apt + glibc | -| `python:3.12-slim-bookworm` | ~150 MB | Python apps | -| `node:22-slim` | ~200 MB | Node.js apps | - -## Reducing Image Size - -### Combine RUN Commands - -```dockerfile -# Bad: creates extra layers -RUN apt-get update -RUN apt-get install -y curl -RUN rm -rf /var/lib/apt/lists/* - -# Good: single layer, cleanup in same step -RUN apt-get update && \ - apt-get install -y --no-install-recommends curl && \ - rm -rf /var/lib/apt/lists/* -``` - -### Remove Build Dependencies - -```dockerfile -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential libffi-dev && \ - pip install --no-cache-dir -r requirements.txt && \ - apt-get purge -y build-essential libffi-dev && \ - apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* -``` - -### Python: Compile Bytecode & Strip - -```dockerfile -FROM python:3.12-slim-bookworm AS builder -COPY requirements.txt . -RUN pip install --no-cache-dir --prefix=/install -r requirements.txt -RUN python -m compileall /install/lib - -FROM python:3.12-slim-bookworm -COPY --from=builder /install /usr/local -# .pyc files load faster, .py source can be removed if needed -``` - -### Go: Strip Binaries - -```dockerfile -RUN CGO_ENABLED=0 go build -ldflags="-s -w" -o /app/server -# -s: strip symbol table -# -w: strip DWARF debug info -# Further compress with UPX if needed: -# RUN upx --best /app/server -``` - -## Analyzing Image Size - -```bash -# View layer sizes -docker history myapp:latest - -# Use dive for interactive layer inspection -dive myapp:latest - -# Compare image sizes -docker images --format "table {{.Repository}}:{{.Tag}}\t{{.Size}}" -``` - -## .dockerignore Best Practices - -Always include a `.dockerignore` to prevent large or sensitive files from entering the build context: - -```text -.git -.github -.venv -node_modules -__pycache__ -*.pyc -.env -.env.* -coverage -.pytest_cache -*.md -Dockerfile -docker-compose*.yml -``` - -## BuildKit Parallel Builds - -```dockerfile -# Independent stages build in parallel -FROM node:22-slim AS frontend-builder -COPY frontend/ . -RUN npm ci && npm run build - -FROM golang:1.22 AS backend-builder -COPY backend/ . -RUN go build -o /server - -FROM gcr.io/distroless/static-debian12:nonroot -COPY --from=backend-builder /server /server -COPY --from=frontend-builder /dist /static -ENTRYPOINT ["/server"] -``` - -```bash -# Enable BuildKit (default in recent Docker) -DOCKER_BUILDKIT=1 docker build . -``` diff --git a/plugins/flow/skills/duckdb/SKILL.md b/plugins/flow/skills/duckdb/SKILL.md deleted file mode 100644 index 14a526e..0000000 --- a/plugins/flow/skills/duckdb/SKILL.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -name: duckdb -description: "Use when writing DuckDB SQL, using .duckdb files, duckdb imports, analytical queries, CSV/Parquet/JSON ingestion, ETL pipelines, extensions, client APIs, configuration, or performance tuning." ---- - -# DuckDB - -## Overview - -DuckDB is an in-process analytical database with rich SQL dialect, first-class support for Parquet/CSV/JSON, and client APIs for Python, Node.js, Rust, Java, R, Go, WASM, and more. It excels at OLAP workloads, local data exploration, embedded analytics, and data engineering pipelines across local and cloud data sources. - -## References Index - -For detailed guides and patterns, refer to the following documents in `references/`: - -- **[Core DuckDB](references/core.md)** - - SQL dialect highlights, data import/export, configuration, and key SQL patterns. -- **[Advanced SQL Patterns](references/sql_patterns.md)** - - QUALIFY, COLUMNS(*), EXCLUDE/REPLACE/RENAME, list comprehensions, structs, maps, PIVOT/UNPIVOT, ASOF joins, UNION BY NAME, recursive CTEs, GROUP BY ALL, SAMPLE, string slicing, lambda functions. -- **[Performance Tuning](references/performance.md)** - - EXPLAIN ANALYZE, storage inspection, pushdown optimizations, parallel execution, memory management, Parquet performance, partition pruning, bulk loading, indexing. -- **[Data Engineering](references/data_engineering.md)** - - Multi-source reads (CSV, Parquet, JSON, Excel, SQLite, PostgreSQL, MySQL), httpfs/S3/GCS/Azure, glob patterns, Delta Lake, Iceberg, partitioned output, ETL patterns, cross-database queries, secrets management. -- **[Python Client](references/python_client.md)** - - Connection management, DataFrame integration, relational API, and parameter binding. -- **[Client Connections](references/connections.md)** - - Node.js, Rust, Java/JDBC, R/dbplyr, Go, WASM, ADBC (Arrow), ODBC driver setup and usage. -- **[Key Function Reference](references/functions.md)** - - Aggregates, date/time, string, list, struct, map, spatial, and full-text search functions. -- **[Extension Development](references/extensions.md)** - - Building, testing, and distributing DuckDB C++ extensions. -- **[CLI](references/cli.md)** - - Interactive shell usage, dot-commands, and scripting patterns. -- **[Configuration & Administration](references/configuration.md)** - - Pragmas, SET statements, database files/WAL/checkpointing, catalog inspection, extension management, cloud credentials, .duckdbrc startup config. - ---- - - - -## Key SQL Dialect Features - -- `SELECT * EXCLUDE (col)` -- select all columns except specific ones -- `SELECT COLUMNS('pattern')` -- select columns matching a regex -- `PIVOT` / `UNPIVOT` -- built-in pivot support -- `LIST`, `STRUCT`, `MAP` nested types with full query support -- `GROUP BY ALL`, `ORDER BY ALL` -- automatic grouping/ordering -- Friendly SQL: `FROM tbl SELECT col` syntax, implicit `SELECT *` - - - ---- - -## Quick Start - - - -```python -import duckdb -con = duckdb.connect() # in-memory -result = con.sql("SELECT 42 AS answer").fetchall() -``` - -```bash -# CLI -duckdb mydb.duckdb "SELECT * FROM read_parquet('data/*.parquet')" -``` - - - ---- - -## Official References - -- DuckDB documentation: -- Python API: -- Extensions: -- CLI: -- Extension template: - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [PostgreSQL](https://github.com/cofin/flow/blob/main/templates/styleguides/databases/postgres_psql.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -Add guardrails instructions here. - - - -## Validation - -Add validation instructions here. - diff --git a/plugins/flow/skills/duckdb/agents/openai.yaml b/plugins/flow/skills/duckdb/agents/openai.yaml deleted file mode 100644 index 9fa5228..0000000 --- a/plugins/flow/skills/duckdb/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "DuckDB" - short_description: "DuckDB SQL, analytics, ETL, extensions, clients, and performance tuning" diff --git a/plugins/flow/skills/duckdb/references/cli.md b/plugins/flow/skills/duckdb/references/cli.md deleted file mode 100644 index f407f7b..0000000 --- a/plugins/flow/skills/duckdb/references/cli.md +++ /dev/null @@ -1,105 +0,0 @@ -# DuckDB CLI - -## Interactive Shell - -```bash -# Start interactive shell (in-memory) -duckdb - -# Open or create a persistent database -duckdb my_database.duckdb - -# Read-only mode -duckdb -readonly my_database.duckdb - -# Execute a query and exit -duckdb -c "SELECT 'hello, duckdb'" -duckdb my.duckdb "SELECT COUNT(*) FROM my_table" -``` - ---- - -## Dot-Commands - -```text -.help -- Show all dot-commands -.mode -- Set output mode (csv, json, markdown, table, line, etc.) -.headers on|off -- Toggle column headers -.output FILE -- Redirect output to a file -.output -- Reset output to stdout -.read FILE -- Execute SQL from a file -.tables -- List tables -.schema TABLE -- Show CREATE statement for a table -.timer on|off -- Toggle query timer -.width N1 N2 ... -- Set column widths for column mode -.quit -- Exit the shell -``` - -### Output Modes - -```text -.mode csv -- Comma-separated values -.mode json -- JSON array of objects -.mode markdown -- Markdown table -.mode table -- ASCII table (default) -.mode line -- One value per line -.mode latex -- LaTeX tabular -``` - ---- - -## Scripting and Piping - -### Read SQL from a File - -```bash -duckdb my.duckdb < queries.sql -duckdb my.duckdb ".read queries.sql" -``` - -### Read from stdin - -```bash -echo "SELECT 42 AS answer;" | duckdb -cat data.csv | duckdb -c "SELECT * FROM read_csv('/dev/stdin')" -``` - -### Output Formatting for Scripts - -```bash -# CSV output -duckdb -csv -c "SELECT 1 AS a, 2 AS b" - -# JSON output -duckdb -json -c "SELECT 1 AS a, 2 AS b" - -# No headers -duckdb -noheader -c "SELECT 42" - -# Combine flags -duckdb -csv -noheader my.duckdb "SELECT name FROM users" -``` - -### Common One-Liners - -```bash -# Convert CSV to Parquet -duckdb -c "COPY (SELECT * FROM read_csv('input.csv')) TO 'output.parquet' (FORMAT PARQUET)" - -# Query remote Parquet -duckdb -c "INSTALL httpfs; LOAD httpfs; SELECT COUNT(*) FROM read_parquet('https://example.com/data.parquet')" - -# Summarize a CSV -duckdb -c "SUMMARIZE SELECT * FROM read_csv('data.csv')" - -# Describe schema -duckdb -c "DESCRIBE SELECT * FROM read_parquet('data.parquet')" -``` - ---- - -## Official Documentation - -- CLI overview: -- Dot-commands: -- Output formats: diff --git a/plugins/flow/skills/duckdb/references/configuration.md b/plugins/flow/skills/duckdb/references/configuration.md deleted file mode 100644 index 028675e..0000000 --- a/plugins/flow/skills/duckdb/references/configuration.md +++ /dev/null @@ -1,306 +0,0 @@ -# DuckDB Configuration & Administration - -## Pragmas - -```sql --- Database file size and metadata -PRAGMA database_size; - --- DuckDB version -PRAGMA version; - --- Storage layout details per column (compression, row groups, byte sizes) -PRAGMA storage_info('my_table'); - --- Table schema info (column names, types, nullability, defaults) -PRAGMA table_info('my_table'); - --- Show all tables -PRAGMA show_tables; - --- Show detailed table information -PRAGMA show_tables_expanded; - --- Platform and build info -PRAGMA platform; -``` - ---- - -## SET Statements - -```sql --- Memory limit (default: 80% of system RAM) -SET memory_limit = '8GB'; - --- Number of threads (default: number of CPU cores) -SET threads = 4; - --- Temp directory for spilling when memory is exceeded -SET temp_directory = '/tmp/duckdb_spill'; - --- Default sort order -SET default_order = 'ASC'; -- or 'DESC' - --- Null ordering -SET default_null_order = 'NULLS LAST'; -- or 'NULLS FIRST' - --- Progress bar for long queries -SET enable_progress_bar = true; -SET enable_progress_bar_print = true; - --- Preserve insertion order (disable for faster aggregation) -SET preserve_insertion_order = false; - --- Timezone -SET TimeZone = 'America/New_York'; - --- Query all current settings -SELECT * FROM duckdb_settings(); - --- Get a specific setting -SELECT current_setting('memory_limit'); -SELECT current_setting('threads'); -``` - ---- - -## Database Files - -### .duckdb Format - -- DuckDB uses a single file (`.duckdb`) for persistent storage -- Write-Ahead Log (WAL) is stored alongside as `.duckdb.wal` -- Automatically checkpoints WAL into the main file periodically - -### Checkpointing - -```sql --- Force a checkpoint (flush WAL to main database file) -CHECKPOINT; - --- Force checkpoint and truncate WAL -FORCE CHECKPOINT; - --- Configure automatic checkpoint threshold (bytes of WAL before auto-checkpoint) -SET wal_autocheckpoint = '256MB'; -``` - -### File Locking - -- Only one process can write to a `.duckdb` file at a time -- Multiple read-only connections are allowed -- Use `read_only=True` / `-readonly` for concurrent read access - -```sql --- Open read-only from CLI --- duckdb -readonly my.duckdb -``` - ---- - -## Catalog: Inspecting the Database - -### information_schema (SQL Standard) - -```sql --- All tables -SELECT table_schema, table_name, table_type -FROM information_schema.tables; - --- All columns for a table -SELECT column_name, data_type, is_nullable, column_default -FROM information_schema.columns -WHERE table_name = 'my_table'; - --- All schemata -SELECT * FROM information_schema.schemata; -``` - -### DuckDB System Functions - -```sql --- Tables with detailed metadata -SELECT * FROM duckdb_tables(); - --- Columns -SELECT * FROM duckdb_columns() WHERE table_name = 'my_table'; - --- Views -SELECT * FROM duckdb_views(); - --- Indexes -SELECT * FROM duckdb_indexes(); - --- Types (enums, structs, etc.) -SELECT * FROM duckdb_types(); - --- Dependencies between objects -SELECT * FROM duckdb_dependencies(); - --- Currently running queries -SELECT * FROM duckdb_temporary_files(); - --- Attached databases -SELECT * FROM duckdb_databases(); - --- Schemas -SELECT * FROM duckdb_schemas(); - --- Functions (built-in and UDFs) -SELECT DISTINCT function_name FROM duckdb_functions() -WHERE function_name LIKE 'list_%'; - --- Sequences -SELECT * FROM duckdb_sequences(); - --- Constraints -SELECT * FROM duckdb_constraints() WHERE table_name = 'my_table'; -``` - ---- - -## Extension Management - -### Install and Load - -```sql --- Install an extension (downloads once) -INSTALL httpfs; - --- Load an extension (needed per session) -LOAD httpfs; - --- Install and load in one step (auto-loads on first use in some cases) -INSTALL httpfs; -LOAD httpfs; - --- List installed extensions -SELECT * FROM duckdb_extensions(); - --- Check loaded vs installed -SELECT extension_name, installed, loaded FROM duckdb_extensions(); -``` - -### Auto-Install and Auto-Load - -Many core extensions auto-install and auto-load when first referenced: - -```sql --- httpfs auto-loads when accessing s3:// or https:// URLs -SELECT * FROM read_parquet('s3://bucket/data.parquet'); - --- json auto-loads for read_json -SELECT * FROM read_json('data.json'); -``` - -### Custom Extension Repositories - -```sql --- Use a custom repository -SET custom_extension_repository = 'https://my-extensions.example.com'; -INSTALL my_custom_extension; -LOAD my_custom_extension; - --- Install from community repository -INSTALL spatial FROM community; -``` - -### Updating Extensions - -```sql --- Update all extensions -UPDATE EXTENSIONS; - --- Force reinstall a specific extension -FORCE INSTALL httpfs; -``` - ---- - -## Cloud Access Configuration - -### httpfs (S3, GCS, HTTP) - -```sql -INSTALL httpfs; -LOAD httpfs; - --- S3 credentials via SET -SET s3_region = 'us-east-1'; -SET s3_access_key_id = 'AKIA...'; -SET s3_secret_access_key = '...'; - --- Or use secrets manager (preferred) -CREATE SECRET my_s3 ( - TYPE s3, - KEY_ID 'AKIA...', - SECRET '...', - REGION 'us-east-1' -); -``` - -### AWS Credential Chain - -```sql --- Use default AWS credential chain (env vars, ~/.aws/credentials, instance profile) -CREATE SECRET aws_creds ( - TYPE s3, - PROVIDER credential_chain -); -``` - -### GCS - -```sql --- GCS via S3-compatible endpoint -SET s3_endpoint = 'storage.googleapis.com'; - --- Or use GCS secret -CREATE SECRET gcs_creds ( - TYPE gcs, - KEY_ID '...', - SECRET '...' -); -``` - -### Azure Blob Storage - -```sql --- Azure connection string -CREATE SECRET azure_creds ( - TYPE azure, - CONNECTION_STRING 'DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;' -); -``` - ---- - -## Startup Configuration: .duckdbrc - -DuckDB reads `~/.duckdbrc` on CLI startup (similar to `.bashrc`). - -```sql --- ~/.duckdbrc example -.timer on -SET memory_limit = '4GB'; -SET threads = 4; -SET temp_directory = '/tmp/duckdb_spill'; -LOAD httpfs; -.mode markdown -``` - -- Each line is executed as a SQL statement or dot-command -- Only applies to CLI sessions, not programmatic connections -- Use `.duckdbrc` for personal defaults (output mode, memory, extensions) - ---- - -## Official Documentation - -- Configuration: -- Pragmas: -- Extensions: -- Secrets manager: -- information_schema: -- System catalog: diff --git a/plugins/flow/skills/duckdb/references/connections.md b/plugins/flow/skills/duckdb/references/connections.md deleted file mode 100644 index 2d1189b..0000000 --- a/plugins/flow/skills/duckdb/references/connections.md +++ /dev/null @@ -1,274 +0,0 @@ -# DuckDB Client Connections (Beyond Python) - -## Node.js - -### duckdb-node (Synchronous API) - -```javascript -const duckdb = require('duckdb'); -const db = new duckdb.Database(':memory:'); // or 'my.duckdb' -const con = db.connect(); - -con.all("SELECT 42 AS answer", (err, rows) => { - console.log(rows); // [{ answer: 42 }] -}); - -// Prepared statements -const stmt = con.prepare("SELECT * FROM range(?) AS t(i)"); -stmt.all(10, (err, rows) => console.log(rows)); -``` - -### duckdb-async (Promise-Based) - -```javascript -import { Database } from 'duckdb-async'; - -const db = await Database.create(':memory:'); -const rows = await db.all("SELECT 42 AS answer"); - -// With connection pool -const con = await db.connect(); -await con.run("CREATE TABLE t AS SELECT * FROM range(100)"); -const result = await con.all("SELECT * FROM t WHERE i > 50"); -await con.close(); -``` - -- Docs: - ---- - -## Rust - -### duckdb-rs - -```rust -use duckdb::{Connection, params}; - -let conn = Connection::open_in_memory()?; - -conn.execute_batch("CREATE TABLE t (id INTEGER, name VARCHAR)")?; - -// Prepared statement with parameters -let mut stmt = conn.prepare("INSERT INTO t VALUES (?, ?)")?; -stmt.execute(params![1, "Alice"])?; - -// Query results -let mut stmt = conn.prepare("SELECT id, name FROM t")?; -let rows = stmt.query_map([], |row| { - Ok((row.get::<_, i32>(0)?, row.get::<_, String>(1)?)) -})?; -for row in rows { - println!("{:?}", row?); -} -``` - -### Appender API (Bulk Insert) - -```rust -let conn = Connection::open_in_memory()?; -conn.execute_batch("CREATE TABLE t (id INTEGER, val DOUBLE)")?; - -let mut appender = conn.appender("t")?; -for i in 0..100_000 { - appender.append_row(params![i, i as f64 * 1.5])?; -} -appender.flush()?; -``` - -- Crate: -- Docs: - ---- - -## Java (JDBC) - -```java -import java.sql.*; - -// Add duckdb_jdbc.jar to classpath -Connection conn = DriverManager.getConnection("jdbc:duckdb:"); - -// Persistent database -Connection conn = DriverManager.getConnection("jdbc:duckdb:/path/to/my.duckdb"); - -Statement stmt = conn.createStatement(); -ResultSet rs = stmt.executeQuery("SELECT 42 AS answer"); -while (rs.next()) { - System.out.println(rs.getInt("answer")); -} - -// Prepared statements -PreparedStatement ps = conn.prepareStatement("SELECT * FROM range(?)"); -ps.setInt(1, 10); -ResultSet rs = ps.executeQuery(); -``` - -### DBeaver Integration - -1. Download the DuckDB JDBC driver JAR from -2. In DBeaver: Database > Driver Manager > New > set class to `org.duckdb.DuckDBDriver` -3. Create a new connection with URL `jdbc:duckdb:/path/to/database.duckdb` - -- Docs: - ---- - -## R - -```r -library(DBI) -library(duckdb) - -# In-memory connection -con <- dbConnect(duckdb()) - -# Persistent database -con <- dbConnect(duckdb(), dbdir = "my.duckdb") - -# Query -result <- dbGetQuery(con, "SELECT 42 AS answer") - -# Register a data frame as a virtual table -duckdb_register(con, "my_df", my_dataframe) -dbGetQuery(con, "SELECT * FROM my_df WHERE x > 10") - -# dbplyr integration (dplyr verbs translate to SQL) -library(dbplyr) -tbl(con, "my_table") %>% - filter(year == 2025) %>% - group_by(region) %>% - summarise(total = sum(sales)) %>% - collect() - -dbDisconnect(con, shutdown = TRUE) -``` - -- Docs: - ---- - -## Go - -### go-duckdb (CGo-Based) - -```go -package main - -import ( - "database/sql" - "fmt" - _ "github.com/marcboeker/go-duckdb" -) - -func main() { - db, _ := sql.Open("duckdb", "") // in-memory - // db, _ := sql.Open("duckdb", "my.duckdb") // persistent - - db.Exec("CREATE TABLE t AS SELECT * FROM range(10) AS t(i)") - - rows, _ := db.Query("SELECT * FROM t WHERE i > 5") - defer rows.Close() - for rows.Next() { - var i int - rows.Scan(&i) - fmt.Println(i) - } -} -``` - -- Docs: - ---- - -## WASM (Browser-Based) - -### duckdb-wasm - -```javascript -import * as duckdb from '@duckdb/duckdb-wasm'; - -const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles(); -const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); - -const worker = new Worker(bundle.mainWorker); -const logger = new duckdb.ConsoleLogger(); -const db = new duckdb.AsyncDuckDB(logger, worker); -await db.instantiate(bundle.mainModule); - -const conn = await db.connect(); -const result = await conn.query("SELECT 42 AS answer"); -console.log(result.toArray()); - -// Register a file from URL -await db.registerFileURL('data.parquet', 'https://example.com/data.parquet'); -const r = await conn.query("SELECT * FROM read_parquet('data.parquet')"); -``` - -- Package: -- Docs: - ---- - -## ADBC (Arrow Database Connectivity) - -Zero-copy data exchange using Apache Arrow format. - -```python -import adbc_driver_duckdb.dbapi as duckdb_adbc - -conn = duckdb_adbc.connect() -with conn.cursor() as cur: - cur.execute("SELECT * FROM range(1000000)") - # Fetch as Arrow RecordBatchReader (zero-copy) - reader = cur.fetch_arrow_table() -``` - -```python -# Use with pandas via ADBC -import pandas as pd -df = pd.read_sql("SELECT * FROM my_table", conn) -``` - -- Docs: - ---- - -## ODBC Driver - -### Setup - -1. Download the DuckDB ODBC driver from -2. Configure DSN in `odbc.ini`: - -```ini -[DuckDB] -Driver = /path/to/libduckdb_odbc.so -Database = /path/to/my.duckdb -``` - -1. Configure driver in `odbcinst.ini`: - -```ini -[DuckDB Driver] -Driver = /path/to/libduckdb_odbc.so -``` - -### Usage - -Works with any ODBC-compatible tool (Excel, Power BI, Tableau, etc.) using the DSN name `DuckDB`. - -- Docs: - ---- - -## Official Documentation - -- All client APIs: -- Node.js: -- Rust: -- Java/JDBC: -- R: -- Go: -- WASM: -- ADBC: -- ODBC: diff --git a/plugins/flow/skills/duckdb/references/core.md b/plugins/flow/skills/duckdb/references/core.md deleted file mode 100644 index 8895465..0000000 --- a/plugins/flow/skills/duckdb/references/core.md +++ /dev/null @@ -1,189 +0,0 @@ -# Core DuckDB Usage - -## SQL Dialect Highlights - -DuckDB extends standard SQL with productivity features for analytical workloads. - -### Column Selection - -```sql --- Exclude specific columns -SELECT * EXCLUDE (sensitive_col, internal_id) FROM users; - --- Select columns matching a pattern -SELECT COLUMNS('revenue_.*') FROM quarterly_report; - --- Apply a function to matching columns -SELECT MIN(COLUMNS('price_.*')) FROM products; -``` - -### PIVOT / UNPIVOT - -```sql --- Pivot rows to columns -PIVOT sales ON product_name USING SUM(amount) GROUP BY region; - --- Unpivot columns to rows -UNPIVOT monthly_data ON jan, feb, mar INTO NAME month VALUE revenue; -``` - -### Nested Types - -```sql --- Lists -SELECT [1, 2, 3] AS my_list; -SELECT list_aggregate([1, 2, 3], 'sum'); - --- Structs -SELECT {'name': 'Alice', 'age': 30} AS person; -SELECT person.name FROM (SELECT {'name': 'Alice'} AS person); - --- Maps -SELECT MAP {'key1': 'value1', 'key2': 'value2'}; -``` - -### Friendly SQL - -```sql --- FROM-first syntax -FROM my_table SELECT col1, col2 WHERE col1 > 10; - --- Implicit SELECT * -FROM my_table; - --- GROUP BY ALL / ORDER BY ALL -SELECT region, product, SUM(sales) FROM data GROUP BY ALL ORDER BY ALL; -``` - ---- - -## Data Import - -### CSV - -```sql --- Auto-detect schema -SELECT * FROM read_csv('data.csv'); -SELECT * FROM read_csv('data/*.csv'); -- glob patterns - --- With options -SELECT * FROM read_csv('data.csv', header=true, delim='|', dateformat='%Y-%m-%d'); - --- Create table from CSV -CREATE TABLE my_table AS SELECT * FROM read_csv('data.csv'); -``` - -### Parquet - -```sql --- Read Parquet (local or remote) -SELECT * FROM read_parquet('data.parquet'); -SELECT * FROM read_parquet('s3://bucket/data/*.parquet'); -- requires httpfs/aws - --- Hive-partitioned datasets -SELECT * FROM read_parquet('data/**/*.parquet', hive_partitioning=true); -``` - -### JSON - -```sql -SELECT * FROM read_json('data.json'); -SELECT * FROM read_json('data.ndjson', format='newline_delimited'); -``` - -### Remote Files (httpfs) - -```sql -INSTALL httpfs; -LOAD httpfs; - --- HTTP(S) sources -SELECT * FROM read_parquet('https://example.com/data.parquet'); - --- S3 -SET s3_region = 'us-east-1'; -SET s3_access_key_id = 'key'; -SET s3_secret_access_key = 'secret'; -SELECT * FROM read_parquet('s3://bucket/path/file.parquet'); -``` - ---- - -## Data Export - -```sql --- CSV -COPY my_table TO 'output.csv' (HEADER, DELIMITER ','); - --- Parquet -COPY my_table TO 'output.parquet' (FORMAT PARQUET, COMPRESSION ZSTD); - --- JSON -COPY my_table TO 'output.json' (FORMAT JSON); - --- Partitioned export -COPY my_table TO 'output' (FORMAT PARQUET, PARTITION_BY (year, month)); -``` - ---- - -## Configuration - -```sql --- Memory and threading -SET memory_limit = '4GB'; -SET threads = 4; - --- Progress bar -SET enable_progress_bar = true; - --- Preserve insertion order (default true; set false for performance) -SET preserve_insertion_order = false; - --- Check current settings -SELECT * FROM duckdb_settings(); -``` - ---- - -## Key SQL Patterns - -### Common Table Expressions (CTEs) - -```sql -WITH monthly AS ( - SELECT date_trunc('month', created_at) AS month, SUM(amount) AS total - FROM orders - GROUP BY ALL -) -SELECT month, total, total - LAG(total) OVER (ORDER BY month) AS delta -FROM monthly; -``` - -### Window Functions - -```sql -SELECT - name, - department, - salary, - RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank, - salary - AVG(salary) OVER (PARTITION BY department) AS vs_dept_avg -FROM employees; -``` - -### Temporary Tables - -```sql -CREATE TEMP TABLE staging AS -SELECT * FROM read_csv('raw_data.csv') WHERE quality_flag = 'GOOD'; -``` - ---- - -## Official Documentation - -- SQL reference: -- Data import: -- Configuration: -- Functions: diff --git a/plugins/flow/skills/duckdb/references/data_engineering.md b/plugins/flow/skills/duckdb/references/data_engineering.md deleted file mode 100644 index 7360898..0000000 --- a/plugins/flow/skills/duckdb/references/data_engineering.md +++ /dev/null @@ -1,318 +0,0 @@ -# DuckDB Data Engineering Patterns - -## Reading from Multiple Sources - -### CSV - -```sql --- Auto-detect schema, delimiters, headers -SELECT * FROM read_csv('data.csv'); - --- Explicit options -SELECT * FROM read_csv('data.csv', - header=true, - delim='|', - dateformat='%Y-%m-%d', - null_padding=true, - ignore_errors=true -); - --- Multiple files via glob -SELECT * FROM read_csv('data/*.csv', union_by_name=true); -``` - -### Parquet - -```sql -SELECT * FROM read_parquet('data.parquet'); -SELECT * FROM read_parquet('data/**/*.parquet', hive_partitioning=true); -SELECT * FROM read_parquet(['file1.parquet', 'file2.parquet']); -``` - -### JSON - -```sql --- Standard JSON -SELECT * FROM read_json('data.json'); - --- Newline-delimited JSON -SELECT * FROM read_json('logs/*.ndjson', format='newline_delimited'); - --- With explicit schema -SELECT * FROM read_json('data.json', - columns={id: 'INTEGER', name: 'VARCHAR', tags: 'VARCHAR[]'} -); -``` - -### Excel - -```sql -INSTALL spatial; -- includes Excel reader -LOAD spatial; -SELECT * FROM st_read('data.xlsx', layer='Sheet1'); -``` - -### SQLite - -```sql -INSTALL sqlite; -LOAD sqlite; -SELECT * FROM sqlite_scan('my_database.sqlite', 'my_table'); - --- Or attach the whole database -ATTACH 'my_database.sqlite' AS sqlite_db (TYPE sqlite); -SELECT * FROM sqlite_db.my_table; -``` - -### PostgreSQL - -```sql -INSTALL postgres; -LOAD postgres; - -ATTACH 'postgresql://user:pass@host:5432/dbname' AS pg (TYPE postgres); -SELECT * FROM pg.public.customers WHERE region = 'US'; -``` - -### MySQL - -```sql -INSTALL mysql; -LOAD mysql; - -ATTACH 'mysql://user:pass@host:3306/dbname' AS mysql_db (TYPE mysql); -SELECT * FROM mysql_db.orders LIMIT 100; -``` - ---- - -## httpfs Extension: Remote File Access - -```sql -INSTALL httpfs; -LOAD httpfs; - --- HTTP/HTTPS -SELECT * FROM read_parquet('https://example.com/data.parquet'); - --- S3 -SET s3_region = 'us-east-1'; -SET s3_access_key_id = 'AKIA...'; -SET s3_secret_access_key = '...'; -SELECT * FROM read_parquet('s3://my-bucket/data/*.parquet'); - --- GCS (via S3-compatible endpoint) -SET s3_endpoint = 'storage.googleapis.com'; -SET s3_access_key_id = '...'; -SET s3_secret_access_key = '...'; -SELECT * FROM read_parquet('s3://gcs-bucket/data.parquet'); - --- Azure Blob Storage -SET azure_storage_connection_string = '...'; -SELECT * FROM read_parquet('azure://container/path/data.parquet'); -``` - ---- - -## Glob Patterns - -```sql --- All Parquet files in a directory -SELECT * FROM read_parquet('data/*.parquet'); - --- Recursive glob -SELECT * FROM read_parquet('data/**/*.parquet'); - --- Multiple specific files -SELECT * FROM read_parquet(['2024/q1.parquet', '2024/q2.parquet']); - --- Glob with filename column for tracking source -SELECT *, filename FROM read_parquet('data/**/*.parquet', filename=true); - --- CSV glob with union by name (handles schema differences) -SELECT * FROM read_csv('reports/*.csv', union_by_name=true); -``` - ---- - -## Delta Lake and Iceberg Support - -### Delta Lake - -```sql -INSTALL delta; -LOAD delta; - --- Read a Delta table -SELECT * FROM delta_scan('s3://bucket/delta-table/'); - --- Time travel -SELECT * FROM delta_scan('s3://bucket/delta-table/', version=5); -``` - -### Iceberg - -```sql -INSTALL iceberg; -LOAD iceberg; - --- Read an Iceberg table -SELECT * FROM iceberg_scan('s3://bucket/iceberg-table/'); - --- Inspect snapshots -SELECT * FROM iceberg_snapshots('s3://bucket/iceberg-table/'); -``` - ---- - -## Writing Output - -```sql --- CSV -COPY my_table TO 'output.csv' (HEADER, DELIMITER ','); - --- Parquet with compression -COPY my_table TO 'output.parquet' (FORMAT PARQUET, COMPRESSION ZSTD); - --- JSON (newline-delimited) -COPY my_table TO 'output.ndjson' (FORMAT JSON); - --- Partitioned output -COPY my_table TO 'output/' ( - FORMAT PARQUET, - PARTITION_BY (year, month), - OVERWRITE_OR_IGNORE true, - COMPRESSION ZSTD -); - --- Write query results directly -COPY ( - SELECT region, SUM(sales) AS total - FROM orders - GROUP BY region -) TO 'summary.parquet' (FORMAT PARQUET); -``` - ---- - -## ETL Patterns: Source to Sink in a Single Query - -```sql --- CSV to Parquet conversion -COPY (SELECT * FROM read_csv('raw/*.csv')) -TO 'processed/' (FORMAT PARQUET, PARTITION_BY (date)); - --- Aggregate and export in one step -COPY ( - SELECT - date_trunc('day', event_time) AS day, - event_type, - COUNT(*) AS cnt, - COUNT(DISTINCT user_id) AS unique_users - FROM read_parquet('s3://bucket/events/**/*.parquet', hive_partitioning=true) - WHERE event_time >= '2025-01-01' - GROUP BY ALL -) TO 's3://bucket/aggregates/daily.parquet' (FORMAT PARQUET); - --- Join multiple sources and sink -COPY ( - SELECT o.*, c.name, c.segment - FROM read_parquet('orders.parquet') o - JOIN read_csv('customers.csv') c ON o.customer_id = c.id - WHERE o.status = 'completed' -) TO 'enriched_orders.parquet' (FORMAT PARQUET); -``` - ---- - -## Attaching External Databases - -```sql --- PostgreSQL -ATTACH 'postgresql://user:pass@host:5432/mydb' AS pg (TYPE postgres); - --- MySQL -ATTACH 'mysql://user:pass@host:3306/mydb' AS mysql_db (TYPE mysql); - --- SQLite -ATTACH 'path/to/database.sqlite' AS sqlite_db (TYPE sqlite); - --- List attached databases -SELECT * FROM duckdb_databases(); - --- Detach when done -DETACH pg; -``` - ---- - -## Cross-Database Queries - -```sql --- Join a local Parquet file with a PostgreSQL table -ATTACH 'postgresql://user:pass@host/db' AS pg (TYPE postgres); - -SELECT p.*, pg.public.customer_segments.segment -FROM read_parquet('purchases.parquet') p -JOIN pg.public.customer_segments - ON p.customer_id = pg.public.customer_segments.customer_id; - --- Join SQLite data with CSV -ATTACH 'legacy.sqlite' AS legacy (TYPE sqlite); - -SELECT l.*, c.* -FROM legacy.orders l -JOIN read_csv('new_products.csv') c ON l.product_id = c.id; -``` - ---- - -## Secrets Management - -```sql --- Create a secret for AWS S3 access -CREATE SECRET my_s3_secret ( - TYPE s3, - KEY_ID 'AKIA...', - SECRET '...', - REGION 'us-east-1' -); - --- GCS secret -CREATE SECRET my_gcs_secret ( - TYPE gcs, - KEY_ID '...', - SECRET '...' -); - --- Azure secret -CREATE SECRET my_azure_secret ( - TYPE azure, - CONNECTION_STRING '...' -); - --- List secrets (values are redacted) -SELECT * FROM duckdb_secrets(); - --- Drop a secret -DROP SECRET my_s3_secret; - --- Secrets persist for the session; use CREATE PERSISTENT SECRET for persistent databases -CREATE PERSISTENT SECRET prod_s3 ( - TYPE s3, - KEY_ID 'AKIA...', - SECRET '...', - REGION 'us-west-2' -); -``` - ---- - -## Official Documentation - -- Data import: -- httpfs extension: -- Delta extension: -- Iceberg extension: -- PostgreSQL scanner: -- Secrets manager: diff --git a/plugins/flow/skills/duckdb/references/extensions.md b/plugins/flow/skills/duckdb/references/extensions.md deleted file mode 100644 index 0e18738..0000000 --- a/plugins/flow/skills/duckdb/references/extensions.md +++ /dev/null @@ -1,144 +0,0 @@ -# DuckDB Extension Development - -## Overview - -Use this reference to implement and maintain DuckDB extensions with a predictable workflow: configure/build, run DuckDB SQL tests, keep extension metadata/versioning aligned, and ship binaries through CI. - -## Workflow - -1. Confirm extension wiring. -2. Build with the repo's supported flow. -3. Run unit/integration SQL tests. -4. Validate extension packaging/distribution config. -5. Prepare release artifacts and compatibility notes. - ---- - -## 1) Confirm Wiring First - -Check these files before changing code: - -1. `extension_config.cmake` to ensure `duckdb_extension_load( ...)` is correct. -2. `CMakeLists.txt` to verify sources, include paths, and third-party link logic. -3. `Makefile` and CI makefiles for canonical build/test targets. -4. `test/unit_tests/*.test` and `test/integration_tests/*.test` for expected behavior. - ---- - -## 2) Follow DuckDB Extension Build Conventions - -Prefer the extension-template/extension-ci-tools pattern: - -1. Keep extension metadata in `extension_config.cmake`. -2. Use CMake for both loadable and static extension targets where needed. -3. Keep platform-specific dependency resolution explicit. -4. Avoid one-off local scripts for core build orchestration when existing make/CI targets already define behavior. - -### extension_config.cmake - -```cmake -duckdb_extension_load(my_extension - SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} - LOAD_TESTS -) -``` - -### CMakeLists.txt Pattern - -```cmake -cmake_minimum_required(VERSION 3.12) -set(EXTENSION_NAME my_extension) -project(${EXTENSION_NAME}) - -include_directories(src/include) - -set(EXTENSION_SOURCES - src/my_extension.cpp - src/my_functions.cpp -) - -build_static_extension(${EXTENSION_NAME} ${EXTENSION_SOURCES}) -build_loadable_extension(${EXTENSION_NAME} " " ${EXTENSION_SOURCES}) -``` - ---- - -## 3) Testing Best Practices - -1. Run fast unit tests first (no external DB dependency). -2. Run integration tests in containerized environments for external systems. -3. Keep SQL logic tests close to DuckDB's `sqllogictest` style and isolate regressions with focused files. -4. When adding pushdown/type behavior, add direct tests for predicate/projection/type mapping, not only broad end-to-end tests. - -### SQL Logic Test Pattern - -```text -# test/sql/my_extension/basic.test - -# name: test/sql/my_extension/basic.test -# description: Test basic my_extension functionality - -require my_extension - -statement ok -SELECT my_function('hello'); - -query I -SELECT my_function('world'); ----- -expected_result -``` - ---- - -## 4) Local Evaluation Checklist - -When evaluating a DuckDB extension project: - -1. Build wiring looks correct: `extension_config.cmake` loads the extension name. -2. CMake integrates external dependency detection and links required libraries in both static/loadable targets. -3. Build/test ergonomics are good: `Makefile` has `release`, `test`, `integration`, `tidy-check`, `configure_ci`. -4. Release process is documented (e.g., `docs/RELEASE.md`) with tag-driven GitHub Actions flow. -5. Consider documenting signed-extension path and long-term upgrade strategy against DuckDB release cadence. - ---- - -## 5) Distribution and CI - -### GitHub Actions Pattern - -- Use `duckdb/extension-ci-tools` for standardized CI workflows. -- Tag-driven releases produce platform-specific binaries. -- Test across the DuckDB version matrix supported by the extension. - -### Installing Extensions - -```sql --- From community repository -INSTALL my_extension FROM community; -LOAD my_extension; - --- From custom repository -SET custom_extension_repository = 'https://my-repo.example.com'; -INSTALL my_extension; -``` - ---- - -## DuckDB Best-Practice Guardrails - -1. Keep extension changes aligned to the targeted DuckDB branch/version matrix. -2. Prefer reproducible CI/container builds over host-specific assumptions. -3. Keep compatibility notes explicit for each DuckDB version line. -4. Test both functional correctness and extension loading/install experience. - ---- - -## Official References - -- DuckDB extension template: -- Extension distribution overview: -- Extension distribution: -- Community extensions: -- Extension CI tools: -- C API extension loading reference: diff --git a/plugins/flow/skills/duckdb/references/functions.md b/plugins/flow/skills/duckdb/references/functions.md deleted file mode 100644 index e914d01..0000000 --- a/plugins/flow/skills/duckdb/references/functions.md +++ /dev/null @@ -1,322 +0,0 @@ -# DuckDB Key Function Reference - -## Aggregate Functions - -```sql --- arg_min / arg_max: return the value of one column at the row where another is min/max -SELECT arg_min(name, salary) AS lowest_paid, - arg_max(name, salary) AS highest_paid -FROM employees; - --- list_agg: collect values into a list -SELECT department, list(name ORDER BY name) AS members -FROM employees GROUP BY department; - --- string_agg: concatenate strings with separator -SELECT department, string_agg(name, ', ' ORDER BY name) AS names -FROM employees GROUP BY department; - --- approx_count_distinct: HyperLogLog approximate distinct count -SELECT approx_count_distinct(user_id) AS approx_users FROM events; - --- reservoir_sample: random sample of values -SELECT reservoir_quantile(value, 0.5) FROM measurements; - --- Quantile functions -SELECT quantile_cont(salary, 0.5) AS median, - quantile_cont(salary, [0.25, 0.5, 0.75]) AS quartiles -FROM employees; - --- bitstring_agg: aggregate into bitstring -SELECT bitstring_agg(flag) FROM my_flags; - --- kurtosis, skewness, entropy -SELECT kurtosis(value), skewness(value), entropy(category) -FROM measurements; - --- first / last (with ordering) -SELECT first(price ORDER BY ts), last(price ORDER BY ts) -FROM trades WHERE symbol = 'AAPL'; -``` - ---- - -## Date/Time Functions - -```sql --- date_trunc: truncate to specified precision -SELECT date_trunc('month', TIMESTAMP '2025-03-15 10:30:00'); --- 2025-03-01 00:00:00 - --- date_part / extract: get component -SELECT date_part('year', DATE '2025-03-15'); -SELECT extract(dow FROM DATE '2025-03-15'); -- day of week - --- date_diff: difference between dates -SELECT date_diff('day', DATE '2025-01-01', DATE '2025-03-15'); --- 73 - --- date_add / date_sub -SELECT DATE '2025-01-01' + INTERVAL 30 DAY; -SELECT date_add(TIMESTAMP '2025-01-01', INTERVAL 3 MONTH); - --- generate_series: create date/time ranges -SELECT * FROM generate_series(DATE '2025-01-01', DATE '2025-12-31', INTERVAL 1 MONTH); - --- epoch conversions -SELECT epoch(TIMESTAMP '2025-01-01 00:00:00'); -- to epoch seconds -SELECT epoch_ms(1735689600000); -- from epoch milliseconds -SELECT make_timestamp(2025, 1, 15, 10, 30, 0); -- from components - --- strftime / strptime: format / parse -SELECT strftime(CURRENT_TIMESTAMP, '%Y-%m-%d %H:%M'); -SELECT strptime('2025-03-15', '%Y-%m-%d'); - --- age: human-readable interval between dates -SELECT age(DATE '2025-03-15', DATE '2020-01-01'); --- 5 years 2 months 14 days - --- Current date/time -SELECT current_date, current_timestamp, now(); - --- Timezone handling -SELECT TIMESTAMP '2025-01-01 12:00:00' AT TIME ZONE 'America/New_York'; -``` - ---- - -## String Functions - -```sql --- regexp_extract: capture group from regex -SELECT regexp_extract('order-12345-US', 'order-(\d+)-(\w+)', 1); --- '12345' - --- regexp_replace: substitute with regex -SELECT regexp_replace('2025-03-15', '(\d{4})-(\d{2})-(\d{2})', '\2/\3/\1'); --- '03/15/2025' - --- regexp_matches: check if pattern matches -SELECT regexp_matches('hello123', '\d+'); -- true - --- string_split: split into list -SELECT string_split('a,b,c', ','); --- ['a', 'b', 'c'] - --- string_split_regex -SELECT string_split_regex('one two three', '\s+'); - --- format: printf-style formatting -SELECT format('{} has {} items', 'Cart', 42); -SELECT printf('%.2f%%', 99.1); - --- Padding and trimming -SELECT lpad('42', 5, '0'); -- '00042' -SELECT trim(' hello '); -- 'hello' -SELECT ltrim('xxhello', 'x'); -- 'hello' - --- contains, prefix, suffix -SELECT contains('hello world', 'world'); -- true -SELECT starts_with('hello', 'hel'); -- true -SELECT suffix('filename.parquet', '.parquet'); -- true - --- repeat, reverse, replace -SELECT repeat('ab', 3); -- 'ababab' -SELECT reverse('hello'); -- 'olleh' -SELECT replace('foo bar', ' ', '_'); -- 'foo_bar' - --- length, position -SELECT length('hello'); -- 5 -SELECT position('world' IN 'hello world'); -- 7 - --- ASCII / Unicode -SELECT ascii('A'); -- 65 -SELECT chr(65); -- 'A' -SELECT unicode('A'); -- 65 -``` - ---- - -## List Functions - -```sql --- list_transform: apply lambda to each element -SELECT list_transform([1, 2, 3], x -> x * 10); --- [10, 20, 30] - --- list_filter: keep elements matching predicate -SELECT list_filter(['apple', 'banana', 'avocado'], s -> s[1] = 'a'); --- ['apple', 'avocado'] - --- list_reduce: fold to single value -SELECT list_reduce([1, 2, 3, 4], (acc, x) -> acc + x); --- 10 - --- list_sort / list_reverse_sort -SELECT list_sort([3, 1, 2]); -- [1, 2, 3] -SELECT list_reverse_sort([3, 1, 2]); -- [3, 2, 1] - --- flatten: collapse nested lists -SELECT flatten([[1, 2], [3, 4]]); --- [1, 2, 3, 4] - --- unnest: expand list into rows -SELECT unnest([1, 2, 3]) AS val; --- Returns 3 rows - --- list_aggregate: apply aggregate function to list -SELECT list_aggregate([1, 2, 3], 'sum'); -- 6 -SELECT list_aggregate([1, 2, 3], 'avg'); -- 2.0 - --- list_distinct / list_unique -SELECT list_distinct([1, 2, 2, 3]); -- [1, 2, 3] - --- list_contains / list_has_any / list_has_all -SELECT list_contains([1, 2, 3], 2); -- true -SELECT list_has_any([1, 2, 3], [2, 4]); -- true -SELECT list_has_all([1, 2, 3], [1, 2]); -- true - --- list_concat / list_append / list_prepend -SELECT list_concat([1, 2], [3, 4]); -- [1, 2, 3, 4] -SELECT list_append([1, 2], 3); -- [1, 2, 3] -SELECT list_prepend(0, [1, 2]); -- [0, 1, 2] - --- array_length / list_count (alias len) -SELECT len([1, 2, 3]); -- 3 - --- generate_series as list -SELECT list(generate_series) FROM generate_series(1, 5); -``` - ---- - -## Struct Functions - -```sql --- struct_pack: create a struct -SELECT struct_pack(name := 'Alice', age := 30); - --- struct_extract: get field by name -SELECT struct_extract({'x': 1, 'y': 2}, 'x'); --- 1 - --- Dot notation access -SELECT s.name FROM (SELECT {'name': 'Alice', 'age': 30} AS s); - --- row(): positional struct constructor -SELECT row(1, 'hello', 3.14); - --- struct_insert: add or overwrite fields -SELECT struct_insert({'a': 1, 'b': 2}, c := 3); --- {'a': 1, 'b': 2, 'c': 3} - --- unnest struct into columns -SELECT unnest({'x': 1, 'y': 2, 'z': 3}); --- Returns columns x, y, z -``` - ---- - -## Map Functions - -```sql --- map: create a map -SELECT map([1, 2], ['a', 'b']); --- {1=a, 2=b} - --- MAP literal -SELECT MAP {'key1': 'value1', 'key2': 'value2'}; - --- map_from_entries: from list of key-value structs -SELECT map_from_entries([('a', 1), ('b', 2)]); - --- Element access -SELECT m['key1'] FROM (SELECT MAP {'key1': 10} AS m); - --- map_keys / map_values -SELECT map_keys(MAP {'a': 1, 'b': 2}); -- ['a', 'b'] -SELECT map_values(MAP {'a': 1, 'b': 2}); -- [1, 2] - --- map_entries: convert back to list of structs -SELECT map_entries(MAP {'a': 1, 'b': 2}); - --- cardinality: number of entries -SELECT cardinality(MAP {'a': 1, 'b': 2}); -- 2 - --- map_contains_key (element_at returns NULL if missing) -SELECT element_at(MAP {'a': 1}, 'b'); -- NULL -``` - ---- - -## Spatial Functions (spatial Extension) - -```sql -INSTALL spatial; -LOAD spatial; - --- Create geometry -SELECT ST_Point(40.7128, -74.0060) AS nyc; - --- Distance (in degrees for geographic, meters for projected) -SELECT ST_Distance( - ST_Point(40.7128, -74.0060), - ST_Point(34.0522, -118.2437) -); - --- Contains / Within -SELECT ST_Within( - ST_Point(40.7128, -74.0060), - ST_Buffer(ST_Point(40.71, -74.01), 0.01) -); - --- Read spatial files -SELECT * FROM ST_Read('boundaries.geojson'); -SELECT * FROM ST_Read('parcels.shp'); - --- Area and length -SELECT ST_Area(geom), ST_Length(geom) FROM spatial_table; - --- Transform coordinate reference systems -SELECT ST_Transform(geom, 'EPSG:4326', 'EPSG:3857') FROM my_table; -``` - ---- - -## Full-Text Search (fts Extension) - -```sql -INSTALL fts; -LOAD fts; - --- Create full-text index -PRAGMA create_fts_index('documents', 'doc_id', 'title', 'body'); - --- Search with BM25 scoring -SELECT doc_id, title, score -FROM ( - SELECT *, fts_main_documents.match_bm25(doc_id, 'search query') AS score - FROM documents -) -WHERE score IS NOT NULL -ORDER BY score DESC; - --- Stemming -SELECT stem('running', 'english'); --- 'run' - --- Drop index -PRAGMA drop_fts_index('documents'); -``` - ---- - -## Official Documentation - -- All functions: -- Aggregate functions: -- Date functions: -- String functions: -- List functions: -- Spatial extension: -- Full-text search: diff --git a/plugins/flow/skills/duckdb/references/performance.md b/plugins/flow/skills/duckdb/references/performance.md deleted file mode 100644 index b4c1e9e..0000000 --- a/plugins/flow/skills/duckdb/references/performance.md +++ /dev/null @@ -1,230 +0,0 @@ -# DuckDB Performance Tuning - -## EXPLAIN ANALYZE - -Read query plans to identify bottlenecks. - -```sql --- Logical plan (what DuckDB will do) -EXPLAIN SELECT region, SUM(sales) FROM orders GROUP BY region; - --- Physical plan with execution stats (how it actually ran) -EXPLAIN ANALYZE SELECT region, SUM(sales) FROM orders GROUP BY region; -``` - -### Reading the Plan - -Key physical operators to look for: - -- **SEQ_SCAN** — full table/file scan; check cardinality vs. expected rows -- **FILTER** — row filtering; if high input vs. low output, check if pushdown is possible -- **HASH_GROUP_BY** — grouping; watch for memory spills to disk -- **HASH_JOIN** — join execution; note build vs. probe side cardinalities -- **PROJECTION** — column selection; should appear early if pushdown works -- **PARQUET_SCAN** — Parquet reader; shows row group pruning stats - -Timing is shown per operator. Look for operators consuming disproportionate time. - ---- - -## Storage Inspection - -```sql --- Database file size information -PRAGMA database_size; - --- Detailed storage info per column (compression, row groups, sizes) -PRAGMA storage_info('my_table'); - --- Table metadata -PRAGMA table_info('my_table'); - --- Show all settings and their current values -SELECT * FROM duckdb_settings(); -``` - ---- - -## Projection and Filter Pushdown - -DuckDB automatically pushes projections and filters down into scans. - -```sql --- Only 2 columns read from Parquet, filter applied at scan level -SELECT name, salary -FROM read_parquet('employees.parquet') -WHERE department = 'Engineering'; -``` - -Verify with `EXPLAIN`: the `PARQUET_SCAN` should show the filter and only selected columns. - -**When pushdown fails:** - -- Filters on computed expressions may not push down -- Complex UDFs in WHERE prevent pushdown -- Some external scanners (postgres_scanner) have limited pushdown support - ---- - -## Parallel Execution - -```sql --- Check current thread count -SELECT current_setting('threads'); - --- Set thread count -SET threads = 8; - --- DuckDB parallelizes by pipeline: each stage runs across threads --- Use EXPLAIN ANALYZE to see which pipelines ran in parallel -``` - -### Pipeline Parallelism - -DuckDB splits queries into pipelines separated by pipeline breakers (hash joins, aggregations). Within a pipeline, work is split across threads by data partitions. - -**Tips:** - -- More threads help most with large scans and aggregations -- Small queries may not benefit from high thread counts (overhead) -- Joins: the smaller table is typically the build side (hash table) - ---- - -## Memory Management - -```sql --- Set memory limit (default: 80% of system RAM) -SET memory_limit = '4GB'; - --- Set temp directory for spilling (when memory is exceeded) -SET temp_directory = '/tmp/duckdb_swap'; - --- Check current memory usage -PRAGMA database_size; - --- Preserve insertion order (disable for better performance on large aggregations) -SET preserve_insertion_order = false; -``` - -**Spilling behavior:** When aggregation or join hash tables exceed memory_limit, DuckDB spills intermediate results to the temp directory. This is automatic but slower than in-memory execution. - ---- - -## Parquet Performance - -### Row Group Pruning - -Parquet files contain row groups with min/max statistics per column. DuckDB skips entire row groups that cannot match the filter. - -```sql --- This benefits from row group pruning if Parquet is sorted by date -SELECT * FROM read_parquet('events.parquet') -WHERE event_date BETWEEN '2025-01-01' AND '2025-01-31'; -``` - -**Best practices:** - -- Sort Parquet files by commonly filtered columns before writing -- Use ZSTD compression for best size/speed tradeoff -- Larger row groups (default 122,880 rows) improve scan throughput -- Smaller row groups improve pruning granularity - -### Predicate Pushdown into Parquet - -```sql --- Filter and projection both pushed into Parquet scan -SELECT user_id, event_type -FROM read_parquet('events/*.parquet') -WHERE event_type = 'purchase' AND event_date > '2025-06-01'; -``` - ---- - -## Partitioned Datasets - -### Hive Partitioning - -```sql --- Read hive-partitioned Parquet: year=2025/month=01/data.parquet -SELECT * FROM read_parquet('data/**/*.parquet', hive_partitioning=true) -WHERE year = 2025 AND month = 1; --- Only reads files from year=2025/month=1/ directory -``` - -**Partition pruning:** DuckDB reads directory names as virtual columns and skips directories that don't match the filter. This avoids reading irrelevant files entirely. - -### Writing Partitioned Output - -```sql -COPY (SELECT * FROM transformed_data) -TO 'output/' (FORMAT PARQUET, PARTITION_BY (year, month), COMPRESSION ZSTD); -``` - ---- - -## Persistent vs. In-Memory Databases - -| Aspect | In-Memory | Persistent (.duckdb file) | -|---|---|---| -| Speed | Fastest (no disk I/O for storage) | Slightly slower (WAL writes) | -| Data survival | Lost on close | Survives restarts | -| Use case | Ad-hoc analysis, ETL pipelines | Repeated queries, shared datasets | -| Memory pressure | Entire DB in memory | Can leverage disk for overflow | - -**Recommendation:** Use in-memory for one-shot analytics and ETL. Use persistent when you query the same data repeatedly or need ACID guarantees. - ---- - -## COPY vs. INSERT for Bulk Loading - -```sql --- FAST: bulk load from file (bypasses transaction overhead per row) -COPY my_table FROM 'data.csv' (AUTO_DETECT true); - --- FAST: bulk load from query -CREATE TABLE target AS SELECT * FROM read_parquet('data.parquet'); - --- SLOW: row-by-row inserts (avoid for large datasets) -INSERT INTO my_table VALUES (1, 'a'), (2, 'b'), ...; -``` - -**Guidance:** - -- `COPY` and `CREATE TABLE AS` are optimized for bulk operations -- `INSERT INTO ... SELECT` is efficient for table-to-table transfers -- `executemany()` in Python is slower than DataFrame-based loading -- For Python bulk loading, prefer: `con.sql("INSERT INTO tbl SELECT * FROM df")` - ---- - -## Indexing - -DuckDB uses **ART (Adaptive Radix Tree) indexes** automatically for primary keys and unique constraints. - -```sql --- ART index created automatically -CREATE TABLE users ( - id INTEGER PRIMARY KEY, - name VARCHAR -); - --- Point lookups benefit from ART index -SELECT * FROM users WHERE id = 42; -``` - -**Key points:** - -- DuckDB does **not** support manually created secondary indexes (no `CREATE INDEX` for arbitrary columns) -- For analytical workloads, column-level min/max statistics and zonemap filtering replace traditional indexes -- Primary key and unique constraints create ART indexes automatically -- For range scans on large tables, pre-sorted data with Parquet row group pruning is more effective than indexing - ---- - -## Official Documentation - -- Performance guide: -- Configuration: -- Parquet: -- Storage: diff --git a/plugins/flow/skills/duckdb/references/python_client.md b/plugins/flow/skills/duckdb/references/python_client.md deleted file mode 100644 index f348d08..0000000 --- a/plugins/flow/skills/duckdb/references/python_client.md +++ /dev/null @@ -1,161 +0,0 @@ -# DuckDB Python Client - -## Connection Management - -```python -import duckdb - -# In-memory (default) -con = duckdb.connect() - -# Persistent database -con = duckdb.connect('my_database.duckdb') - -# Read-only mode -con = duckdb.connect('my_database.duckdb', read_only=True) - -# Module-level default connection (convenience) -duckdb.sql("SELECT 42") # uses a shared in-memory connection -``` - ---- - -## Querying and Fetching Results - -```python -# Fetch as list of tuples -result = con.sql("SELECT * FROM range(10)").fetchall() - -# Fetch one row -row = con.sql("SELECT 42 AS answer").fetchone() - -# Fetch as Pandas DataFrame -df = con.sql("SELECT * FROM my_table").fetchdf() -# or equivalently -df = con.sql("SELECT * FROM my_table").df() - -# Fetch as Polars DataFrame -pl_df = con.sql("SELECT * FROM my_table").pl() - -# Fetch as PyArrow Table -arrow_tbl = con.sql("SELECT * FROM my_table").arrow() - -# Fetch as NumPy arrays -np_result = con.sql("SELECT * FROM my_table").fetchnumpy() -``` - ---- - -## Parameter Binding - -```python -# Positional parameters -con.execute("SELECT * FROM users WHERE age > ? AND name = ?", [25, "Alice"]) - -# Named parameters -con.execute( - "SELECT * FROM users WHERE age > $min_age AND name = $name", - {"min_age": 25, "name": "Alice"} -) -``` - ---- - -## DataFrame Integration - -### Pandas - -```python -import pandas as pd - -# Query Pandas DataFrames directly (they appear as virtual tables) -df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]}) -result = con.sql("SELECT * FROM df WHERE a > 1").fetchdf() - -# Insert DataFrame into a DuckDB table -con.sql("CREATE TABLE my_table AS SELECT * FROM df") - -# Export query result back to DataFrame -output_df = con.sql("SELECT a, COUNT(*) FROM my_table GROUP BY a").df() -``` - -### Polars - -```python -import polars as pl - -# Query Polars DataFrames directly -lf = pl.LazyFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -result = con.sql("SELECT * FROM lf WHERE x > 1").pl() - -# Polars can also read from DuckDB via its own SQL interface -df = pl.read_database("SELECT * FROM my_table", connection=con) -``` - -### PyArrow - -```python -import pyarrow as pa -import pyarrow.parquet as pq - -# Query Arrow tables directly -arrow_table = pa.table({"col1": [1, 2], "col2": ["a", "b"]}) -result = con.sql("SELECT * FROM arrow_table").arrow() - -# Efficient Parquet scanning via Arrow -con.sql("SELECT * FROM read_parquet('large_file.parquet')").arrow() -``` - ---- - -## Relational API - -```python -# Build queries programmatically -rel = con.sql("SELECT * FROM read_csv('data.csv')") -filtered = rel.filter("amount > 100") -projected = filtered.project("name, amount") -result = projected.order("amount DESC").limit(10).fetchdf() - -# Aggregation -con.sql("FROM data").aggregate("region, SUM(sales) AS total").fetchdf() -``` - ---- - -## Common Patterns - -### Bulk Insert - -```python -# From list of tuples -con.executemany("INSERT INTO my_table VALUES (?, ?)", [(1, "a"), (2, "b")]) - -# From DataFrame (more efficient) -con.sql("INSERT INTO my_table SELECT * FROM df") -``` - -### Context Manager - -```python -with duckdb.connect('my.duckdb') as con: - con.sql("CREATE TABLE t AS SELECT 1 AS x") - # auto-closed on exit -``` - -### Thread Safety - -```python -# Each thread should use its own connection, or use .cursor() -cursor = con.cursor() -cursor.execute("SELECT 42") -``` - ---- - -## Official Documentation - -- Python API overview: -- Relational API: -- Data ingestion: -- Conversion between types: diff --git a/plugins/flow/skills/duckdb/references/sql_patterns.md b/plugins/flow/skills/duckdb/references/sql_patterns.md deleted file mode 100644 index 3794e13..0000000 --- a/plugins/flow/skills/duckdb/references/sql_patterns.md +++ /dev/null @@ -1,286 +0,0 @@ -# Advanced DuckDB SQL Patterns - -## QUALIFY Clause - -Filter window function results directly, without wrapping in a subquery. - -```sql --- Keep only the top-ranked row per department -SELECT name, department, salary, - RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS rnk -FROM employees -QUALIFY rnk = 1; - --- Deduplicate: keep latest record per customer -SELECT * -FROM events -QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY event_time DESC) = 1; -``` - ---- - -## COLUMNS(*) Expression - -Dynamically select columns by name or pattern. - -```sql --- Apply a function to all numeric columns matching a pattern -SELECT MIN(COLUMNS('revenue_.*')), MAX(COLUMNS('revenue_.*')) -FROM quarterly_report; - --- Apply expression to every column -SELECT COLUMNS(*) + 1 FROM numbers_table; - --- Combine with EXCLUDE -SELECT COLUMNS(* EXCLUDE (id, created_at)) FROM my_table; -``` - ---- - -## EXCLUDE / REPLACE / RENAME in SELECT - -```sql --- EXCLUDE: drop columns from SELECT * -SELECT * EXCLUDE (password_hash, internal_id) FROM users; - --- REPLACE: override a column's expression in SELECT * -SELECT * REPLACE (UPPER(name) AS name, salary * 1.1 AS salary) FROM employees; - --- RENAME: change column names in output -SELECT * RENAME (name AS employee_name, dept AS department) FROM staff; -``` - ---- - -## List Comprehensions - -```sql --- Transform list elements inline -SELECT [x * 2 FOR x IN [1, 2, 3, 4]]; --- [2, 4, 6, 8] - --- With filter condition -SELECT [x FOR x IN list_column IF x > 0] FROM my_table; - --- Nested: flatten and transform -SELECT [y + 1 FOR x IN nested_list FOR y IN x]; -``` - ---- - -## Struct and Map Operations - -```sql --- struct_pack: create a struct from values -SELECT struct_pack(name := 'Alice', age := 30); - --- struct_extract: pull a field out -SELECT struct_extract({'name': 'Alice', 'age': 30}, 'name'); --- or dot notation -SELECT s.name FROM (SELECT {'name': 'Alice', 'age': 30} AS s); - --- row() shorthand -SELECT row('Alice', 30); - --- map_from_entries: create map from key-value pairs -SELECT map_from_entries([('a', 1), ('b', 2)]); - --- Map element access -SELECT m['key1'] FROM (SELECT MAP {'key1': 10, 'key2': 20} AS m); - --- map_keys / map_values -SELECT map_keys(MAP {'a': 1, 'b': 2}); --- ['a', 'b'] -``` - ---- - -## PIVOT / UNPIVOT - -```sql --- PIVOT: aggregate and rotate rows into columns -PIVOT sales_data - ON product_name - USING SUM(amount) - GROUP BY region; - --- PIVOT with multiple aggregations -PIVOT sales_data - ON year - USING SUM(amount) AS total, COUNT(*) AS cnt - GROUP BY product; - --- UNPIVOT: rotate columns into rows -UNPIVOT monthly_metrics - ON jan, feb, mar, apr, may, jun - INTO NAME month VALUE revenue; - --- UNPIVOT with multiple value columns -UNPIVOT wide_table - ON COLUMNS('q[1-4]_sales'), COLUMNS('q[1-4]_costs') - INTO NAME quarter VALUE sales, costs; -``` - ---- - -## ASOF Joins - -Join time-series data by closest matching timestamp. - -```sql --- Match each trade to the most recent quote at or before trade time -SELECT t.*, q.bid, q.ask -FROM trades t -ASOF JOIN quotes q - ON t.symbol = q.symbol - AND t.trade_time >= q.quote_time; - --- ASOF LEFT JOIN (keep trades without matching quotes) -SELECT t.*, q.price -FROM events t -ASOF LEFT JOIN prices q - ON t.ticker = q.ticker - AND t.ts >= q.ts; -``` - ---- - -## UNION BY NAME - -Combine tables with different schemas by column name rather than position. - -```sql --- Tables may have different column sets; matching columns align, others fill with NULL -SELECT * FROM jan_report -UNION BY NAME -SELECT * FROM feb_report; - --- Also works with ALL (preserve duplicates) -SELECT * FROM dataset_a -UNION ALL BY NAME -SELECT * FROM dataset_b; -``` - ---- - -## Recursive CTEs - -```sql --- Generate a number series -WITH RECURSIVE seq AS ( - SELECT 1 AS n - UNION ALL - SELECT n + 1 FROM seq WHERE n < 100 -) -SELECT * FROM seq; - --- Traverse a hierarchy (org chart) -WITH RECURSIVE org AS ( - SELECT id, name, manager_id, 0 AS depth - FROM employees WHERE manager_id IS NULL - UNION ALL - SELECT e.id, e.name, e.manager_id, o.depth + 1 - FROM employees e JOIN org o ON e.manager_id = o.id -) -SELECT * FROM org ORDER BY depth, name; -``` - ---- - -## GROUP BY ALL / ORDER BY ALL - -```sql --- GROUP BY ALL: automatically groups by all non-aggregate columns -SELECT region, product, SUM(sales), AVG(price) -FROM orders -GROUP BY ALL; - --- ORDER BY ALL: orders by all selected columns left to right -SELECT department, name, hire_date -FROM employees -ORDER BY ALL; - --- Combine both -SELECT category, brand, COUNT(*) AS cnt, SUM(revenue) AS total -FROM sales -GROUP BY ALL -ORDER BY ALL; -``` - ---- - -## SAMPLE Clause - -```sql --- Random sample: fixed number of rows -SELECT * FROM large_table USING SAMPLE 1000; - --- Percentage-based sample -SELECT * FROM large_table USING SAMPLE 10%; - --- Repeatable sampling with a seed -SELECT * FROM large_table USING SAMPLE 5% (bernoulli, 42); - --- Reservoir sampling (fixed count, uniform) -SELECT * FROM large_table USING SAMPLE reservoir(500); -``` - ---- - -## String Slicing - -```sql --- Python-style string slicing with [start:end] -SELECT 'DuckDB'[1:4]; --- 'Duck' - -SELECT 'hello world'[7:]; --- 'world' - -SELECT 'hello world'[:5]; --- 'hello' - --- Works on list columns too -SELECT [10, 20, 30, 40, 50][2:4]; --- [20, 30, 40] -``` - ---- - -## Lambda Functions - -```sql --- list_transform: apply a function to each element -SELECT list_transform([1, 2, 3], x -> x * x); --- [1, 4, 9] - --- list_filter: keep elements matching a predicate -SELECT list_filter([1, 2, 3, 4, 5], x -> x % 2 = 0); --- [2, 4] - --- list_reduce: fold a list to a single value -SELECT list_reduce([1, 2, 3, 4], (acc, x) -> acc + x); --- 10 - --- Combine: filter then transform -SELECT list_transform( - list_filter(scores, s -> s >= 60), - s -> s / 100.0 -) AS passing_pcts -FROM students; - --- Lambda with list_sort using custom comparator -SELECT list_sort([3, 1, 2], (a, b) -> a - b); --- [1, 2, 3] -``` - ---- - -## Official Documentation - -- SQL features: -- Window functions: -- Lambda functions: -- Nested types: -- PIVOT/UNPIVOT: -- SAMPLE: diff --git a/plugins/flow/skills/flow-execution/SKILL.md b/plugins/flow/skills/flow-execution/SKILL.md index 15f44e2..d818460 100644 --- a/plugins/flow/skills/flow-execution/SKILL.md +++ b/plugins/flow/skills/flow-execution/SKILL.md @@ -7,6 +7,8 @@ description: "Use when implementing Flow tasks from Beads or spec.md, claiming r Use this lifecycle skill when implementation starts after a Flow plan or ready Beads task exists. +> **Beads mode:** Skip every `bd` invocation when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `../flow/references/discipline.md`. + ## Workflow 1. Select ready work from `bd ready` and claim it before editing. diff --git a/plugins/flow/skills/flow-planning/SKILL.md b/plugins/flow/skills/flow-planning/SKILL.md index e680c71..f022933 100644 --- a/plugins/flow/skills/flow-planning/SKILL.md +++ b/plugins/flow/skills/flow-planning/SKILL.md @@ -15,6 +15,18 @@ Use this lifecycle skill for PRDs, research, single-flow planning, refinement, r 4. Refine until tasks include concrete files, behavior, tests, commands, and acceptance criteria. 5. Create Beads epics/tasks and sync markdown views according to policy. +## Interrogate Before Finalizing (Grill) + +Before locking a PRD, spec, or refined worksheet, interrogate the plan until every decision branch is resolved. This is how Flow meets the Zero-Ambiguity Standard and the Stateless Executor Test — apply it in `flow-prd`, `flow-plan`, and especially `flow-refine`. + +- Ask **one question at a time** and wait for the answer before the next. Walk the decision tree top-down, resolving dependencies between decisions in order. +- For **every** question, give your **recommended answer** and the trade-off, so the user confirms rather than composes from scratch. +- If a question is answerable from the repo, **explore the codebase / `patterns.md` / `knowledge/` / `tech-stack.md` instead of asking**. Only ask product or trade-off questions a human must decide. +- Challenge the plan against the project's **domain language**: reuse the terminology already in `patterns.md` and `knowledge/` chapters, and flag/resolve term conflicts before finalizing. +- Record decisions that are **hard to reverse, surprising, and a real trade-off** into `knowledge/` (or `learnings.md`); skip low-value notes. +- Stop only when no open branch remains and a zero-context executor could implement from the artifact alone. +- The finished spec/worksheet doubles as a **handoff**: reference existing artifacts (PRD, patterns, knowledge, affected files) rather than duplicating them, and name the next skill to invoke. (If the host exposes `grill-me` / `grill-with-docs` / `handoff` skills, you may invoke them to drive these steps; otherwise apply the discipline directly.) + ## Guardrails - Planning must be decision-complete; do not defer obvious research to implementation. diff --git a/plugins/flow/skills/flow-setup/SKILL.md b/plugins/flow/skills/flow-setup/SKILL.md index c01036c..09072bc 100644 --- a/plugins/flow/skills/flow-setup/SKILL.md +++ b/plugins/flow/skills/flow-setup/SKILL.md @@ -7,6 +7,8 @@ description: "Use when initializing Flow in a repo, configuring .agents, install Use this lifecycle skill for project initialization, installation checks, setup validation, and first context files. +> **Beads mode:** Skip every `bd` invocation when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `../flow/references/discipline.md`. + ## Workflow 1. Detect project root, existing `.agents/` state, Beads availability, and repo-native commands. diff --git a/plugins/flow/skills/flow-sync-status/SKILL.md b/plugins/flow/skills/flow-sync-status/SKILL.md index 2e2dbff..d895349 100644 --- a/plugins/flow/skills/flow-sync-status/SKILL.md +++ b/plugins/flow/skills/flow-sync-status/SKILL.md @@ -7,6 +7,8 @@ description: "Use when syncing Beads state to markdown, checking Flow status, re Use this lifecycle skill for status dashboards, sync, context refresh, cleanup checks, and drift reporting. +> **Beads mode:** Skip every `bd` invocation when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. With no backend, `/flow:sync` is a no-op (announce and exit) and status falls back to `spec.md` markers. Never halt for missing Beads. See `../flow/references/discipline.md`. + ## Workflow 1. Read `.agents/beads.json` before any sync/export decision. @@ -17,8 +19,10 @@ Use this lifecycle skill for status dashboards, sync, context refresh, cleanup c ## Guardrails +- **`/flow:sync` ALWAYS writes the reconciled markdown to disk.** It is mandatory: regenerate **every markdown file in `.agents/specs//`** (`spec.md`, `learnings.md`, and any other tracked markdown in the flow folder) — not just `spec.md` — so they all match Beads exactly, and persist them. Sync is never read-only/dry-run and must never finish without writing the markdown. +- **"Sync"/"export" means reconciling markdown ↔ Beads to identical reality on disk — NOT Dolt.** NEVER run `bd dolt` commands (`bd dolt push`/`pull`/`export`) as part of sync. They are out of scope and only run if the user explicitly and separately asks for Dolt. - Sync reads backend state; do not close, block, or mutate tasks during status reporting. -- Do not run export, auto-stage, or Dolt push unless policy or the user explicitly allows it. +- Do not auto-stage/commit unless policy or the user explicitly allows it. - Preserve human-written spec content; only update synchronized task/status regions. - Ask before applying context-doc updates when sync detects drift. diff --git a/plugins/flow/skills/flow/SKILL.md b/plugins/flow/skills/flow/SKILL.md index 2a12987..a6d9e3f 100644 --- a/plugins/flow/skills/flow/SKILL.md +++ b/plugins/flow/skills/flow/SKILL.md @@ -7,6 +7,10 @@ description: "Use when a repository has .agents, when the user asks for Flow lif Flow coordinates Context-Driven Development in `.agents/` repositories. Keep this skill small: use it to identify the active lifecycle phase, enforce the Beads-first invariants, and load the matching lifecycle skill. +> **Flow is a skill, not a CLI.** There is no `flow` executable. Never run `flow`, `flow sync`, `flow prd`, etc. as shell commands. Invoke this skill (or the matching lifecycle skill), or use the `/flow:*` slash commands where the host supports them. +> +> **Beads mode:** Skip every `bd` invocation when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `references/discipline.md`. + ## Workflow 1. Check hook-provided Flow context first; otherwise detect `.agents/`, Beads (`bd`), git branch, and repo-native commands. diff --git a/plugins/flow/skills/flow/references/archive.md b/plugins/flow/skills/flow/references/archive.md index 085d7ed..5810445 100644 --- a/plugins/flow/skills/flow/references/archive.md +++ b/plugins/flow/skills/flow/references/archive.md @@ -1,6 +1,8 @@ # Flow Archive +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + Archive completed flow and elevate patterns to project level. ## Usage diff --git a/plugins/flow/skills/flow/references/finish.md b/plugins/flow/skills/flow/references/finish.md index c60607b..2ab1b7e 100644 --- a/plugins/flow/skills/flow/references/finish.md +++ b/plugins/flow/skills/flow/references/finish.md @@ -1,6 +1,8 @@ # Flow Finish +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + Complete a flow's development work by verifying, reviewing, and integrating. ## Usage diff --git a/plugins/flow/skills/flow/references/prd.md b/plugins/flow/skills/flow/references/prd.md index ee43f93..774ac48 100644 --- a/plugins/flow/skills/flow/references/prd.md +++ b/plugins/flow/skills/flow/references/prd.md @@ -1,6 +1,8 @@ # Flow PRD +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + ## 1.0 SYSTEM DIRECTIVE You are "The Orchestrator", an AI architect for the Flow framework. Your primary mission is to enforce the **Zero-Ambiguity Mandate**: you MUST complete all necessary analysis and research to create a concrete, High-Definition Roadmap (`prd.md`) that groups multiple granular Flows (Chapters). diff --git a/plugins/flow/skills/flow/references/refresh.md b/plugins/flow/skills/flow/references/refresh.md index 0b5a2c9..3dbb8ae 100644 --- a/plugins/flow/skills/flow/references/refresh.md +++ b/plugins/flow/skills/flow/references/refresh.md @@ -1,5 +1,7 @@ # /flow:refresh — Sync Context with Codebase +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + ## Purpose Refresh the flow's context files by re-scanning the codebase and updating `.agents/` metadata to reflect the current state. Use this when returning to a project after external changes (other contributors, CI, dependency updates) or when context files feel stale. diff --git a/plugins/flow/skills/flow/references/revert.md b/plugins/flow/skills/flow/references/revert.md index 0854dcc..49808b6 100644 --- a/plugins/flow/skills/flow/references/revert.md +++ b/plugins/flow/skills/flow/references/revert.md @@ -1,6 +1,8 @@ # Flow Revert +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + Git-aware revert of flows, phases, or tasks. ## Usage diff --git a/plugins/flow/skills/flow/references/review.md b/plugins/flow/skills/flow/references/review.md index b14fef2..5648471 100644 --- a/plugins/flow/skills/flow/references/review.md +++ b/plugins/flow/skills/flow/references/review.md @@ -1,6 +1,8 @@ # Flow Review +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + Dispatch a code review for a flow's implementation with Beads-aware git range detection. ## Usage diff --git a/plugins/flow/skills/flow/references/revise.md b/plugins/flow/skills/flow/references/revise.md index 6b96804..5a634f9 100644 --- a/plugins/flow/skills/flow/references/revise.md +++ b/plugins/flow/skills/flow/references/revise.md @@ -1,6 +1,8 @@ # Flow Revise +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + Update spec or plan when implementation reveals issues. ## Usage diff --git a/plugins/flow/skills/flow/references/status.md b/plugins/flow/skills/flow/references/status.md index 90f370f..f4a06c6 100644 --- a/plugins/flow/skills/flow/references/status.md +++ b/plugins/flow/skills/flow/references/status.md @@ -1,6 +1,8 @@ # Flow Status +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + Display progress overview for all active flows. ## Phase 1: Load Registry diff --git a/plugins/flow/skills/flow/references/task.md b/plugins/flow/skills/flow/references/task.md index 8c340ae..1204f10 100644 --- a/plugins/flow/skills/flow/references/task.md +++ b/plugins/flow/skills/flow/references/task.md @@ -1,6 +1,8 @@ # Flow Task +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + Create ephemeral exploration flow (no audit trail). ## Usage diff --git a/plugins/flow/skills/flow/references/validate.md b/plugins/flow/skills/flow/references/validate.md index 4998471..293d0d6 100644 --- a/plugins/flow/skills/flow/references/validate.md +++ b/plugins/flow/skills/flow/references/validate.md @@ -1,6 +1,8 @@ # Flow Validate +> **Beads mode:** Skip every `bd` invocation below when the SessionStart hook reports `Beads Backend: Missing (None)` or `Disabled via plugin config (useBeads=false)`. Treat `spec.md` markers as fallback source of truth and skip `/flow:sync`. Never halt for missing Beads. See `discipline.md`. + Validate project integrity and fix issues. ## Usage diff --git a/plugins/flow/skills/gcp/SKILL.md b/plugins/flow/skills/gcp/SKILL.md deleted file mode 100644 index a14db7a..0000000 --- a/plugins/flow/skills/gcp/SKILL.md +++ /dev/null @@ -1,390 +0,0 @@ ---- -name: gcp -description: "Use when managing Google Cloud resources, editing .gcloudignore or app.yaml, scripting gcloud commands, configuring IAM, service accounts, Cloud Storage, Pub/Sub, BigQuery, Vertex AI, or GCP services." ---- - -# Google Cloud Platform (GCP) Skill - -## Service Overview - -Use the dedicated `terraform` skill when the question is primarily about Terraform layout, state boundaries, brownfield import/export, workspaces, or CI-driven plan/apply workflows. Keep this `gcp` skill focused on Google Cloud services and `gcloud`-centric operations. - -### Core Services - -- **Compute**: - - **Cloud Run**: Serverless containers (default choice for stateless apps). - - **GKE**: Managed Kubernetes for complex orchestrations. - - **Compute Engine**: Raw VMs for specific OS/kernel needs. -- **Data & Storage**: - - **Cloud Storage (GCS)**: Object storage. - - **Cloud SQL**: Managed PostgreSQL/MySQL/SQL Server. - - **BigQuery**: Serverless data warehouse (analytics). - - **Firestore**: NoSQL document database. -- **AI/ML**: - - **Vertex AI**: Unified platform for models (Gemini, PaLM), training, and deployment. - - - -## `gcloud` CLI & Scripting - -### Configuration & Auth - - - -Avoid interactive prompts in scripts. - - - - - -```bash -# Production/CI: Use Service Account Key or Workload Identity -gcloud auth activate-service-account --key-file=key.json - -# Local Dev: User Login -gcloud auth login -gcloud config set project MY_PROJECT_ID -``` - - - -### Scripting Best Practices - -#### 1. Structured Output - -Never parse default text output. Use `--format` (json/yaml) and `--filter`. - - - -```bash -# Bad -gcloud compute instances list | grep RUNNING - -# Good (Parseable JSON) -gcloud compute instances list --format="json" - -# Good (Filter + Specific Value) -gcloud run services list \ - --filter="status.conditions.status=True AND metadata.name:my-service" \ - --format="value(status.url)" -``` - - - -#### 2. Deterministic Filters - -Flatten complex resources to find what you need. - - - -```bash -# Find latest revision of a service -gcloud run revisions list \ - --service=my-service \ - --sort-by="~metadata.creationTimestamp" \ - --limit=1 \ - --format="value(metadata.name)" -``` - - - -#### 3. Quiet Mode - -Suppress "updates available" warnings and prompts. - - - -```bash -export CLOUDSDK_CORE_DISABLE_PROMPTS=1 -gcloud ... --quiet -``` - - - -## Automation Patterns - -### 1. Cloud Run Deployment - -Standard pattern for deploying containers. - - - -```bash -gcloud run deploy my-service \ - --image gcr.io/my-project/my-image:tag \ - --platform managed \ - --region us-central1 \ - --allow-unauthenticated \ - --set-env-vars="DEBUG=true,DB_HOST=10.0.0.2" -``` - - - -### 2. Secret Management - - - -Access secrets securely (requires Secret Manager API). - - - - - -```bash -# Mount as volume in Cloud Run (Preferred) -gcloud run deploy ... --set-secrets="/secrets/db=my-db-secret:latest" - -# Access via CLI (for ops scripts) -gcloud secrets versions access latest --secret="my-secret" -``` - - - - - -## AlloyDB - -AlloyDB is a fully managed PostgreSQL-compatible database with columnar engine and ML-assisted auto-vacuum. - -### Cluster / Instance Model - -- **Cluster**: regional resource containing one primary and optional read pool instances. -- **Primary instance**: read-write; choose machine type and vCPUs. -- **Read pool**: horizontally scalable read-only replicas within the same cluster. - -```bash -# Create a cluster -gcloud alloydb clusters create my-cluster \ - --region=us-central1 \ - --password=SECRET \ - --network=projects/MY_PROJECT/global/networks/default - -# Create primary instance -gcloud alloydb instances create my-primary \ - --cluster=my-cluster \ - --region=us-central1 \ - --instance-type=PRIMARY \ - --cpu-count=4 -``` - -### PSA Networking Requirement - -AlloyDB requires **Private Service Access (PSA)** — a peered VPC range allocated for Google-managed services. Client VMs must be in the same VPC (or a connected VPC) to reach the instance IP. - -```bash -# Allocate PSA range (one-time per VPC) -gcloud compute addresses create google-managed-services-default \ - --global \ - --purpose=VPC_PEERING \ - --prefix-length=20 \ - --network=default - -# Create the peering -gcloud services vpc-peerings connect \ - --service=servicenetworking.googleapis.com \ - --ranges=google-managed-services-default \ - --network=default -``` - -### AlloyDB vs Cloud SQL - -| Aspect | AlloyDB | Cloud SQL | -|--------|---------|-----------| -| Engine | PostgreSQL-compatible only | PostgreSQL, MySQL, SQL Server | -| Performance | ~4× higher throughput (columnar engine, shared memory cache) | Standard managed RDBMS | -| HA | Auto-failover < 60 s, cross-zone | Regional replica, ~60 s failover | -| Pricing | Higher; compute + storage separate | Instance + storage (simpler) | -| Best for | High-throughput OLTP, mixed OLTP/OLAP | General-purpose managed SQL | - ---- - -## Secret Manager Patterns - -### Diff-Based Updates - -Avoid creating unnecessary secret versions. Compare the current value before adding a new version. - -```bash -# Read existing value -CURRENT=$(gcloud secrets versions access latest --secret="my-secret" 2>/dev/null || echo "") - -NEW_VALUE="new-password-here" - -if [ "$CURRENT" != "$NEW_VALUE" ]; then - echo -n "$NEW_VALUE" | gcloud secrets versions add my-secret --data-file=- - echo "Secret updated." -else - echo "Secret unchanged, skipping version creation." -fi -``` - -### Common Access Patterns - -```bash -# Access the latest version -gcloud secrets versions access latest --secret="my-secret" - -# Access a specific version -gcloud secrets versions access 3 --secret="my-secret" - -# List versions -gcloud secrets versions list my-secret - -# Create a new secret -echo -n "my-value" | gcloud secrets create my-secret \ - --data-file=- \ - --replication-policy=automatic -``` - ---- - -## IAM Workload Identity - -Workload Identity lets GKE or Cloud Run workloads impersonate a GCP service account without key files. - -### Annotation + Binding Chain - -```bash -# 1. Create a GCP Service Account (GSA) -gcloud iam service-accounts create my-app-sa \ - --display-name="My App SA" - -# 2. Grant required roles to the GSA -gcloud projects add-iam-policy-binding MY_PROJECT \ - --member="serviceAccount:my-app-sa@MY_PROJECT.iam.gserviceaccount.com" \ - --role="roles/secretmanager.secretAccessor" - -# 3. Allow the Kubernetes Service Account (KSA) to impersonate the GSA -gcloud iam service-accounts add-iam-policy-binding \ - my-app-sa@MY_PROJECT.iam.gserviceaccount.com \ - --role="roles/iam.workloadIdentityUser" \ - --member="serviceAccount:MY_PROJECT.svc.id.goog[NAMESPACE/KSA_NAME]" - -# 4. Annotate the KSA -kubectl annotate serviceaccount KSA_NAME \ - --namespace=NAMESPACE \ - iam.gke.io/gcp-service-account=my-app-sa@MY_PROJECT.iam.gserviceaccount.com -``` - -### Service Account Impersonation (CLI) - -```bash -# Impersonate a GSA from a user or another SA -gcloud storage ls \ - --impersonate-service-account=my-app-sa@MY_PROJECT.iam.gserviceaccount.com - -# Generate a short-lived token -gcloud auth print-access-token \ - --impersonate-service-account=my-app-sa@MY_PROJECT.iam.gserviceaccount.com -``` - ---- - -## VPC Networking - -### PSA Ranges - -See AlloyDB section above. PSA is also required for Cloud SQL private IP and Memorystore. - -### Cloud NAT / Router - -Cloud NAT allows VMs without external IPs to reach the internet. - -```bash -# Create a Cloud Router -gcloud compute routers create my-router \ - --region=us-central1 \ - --network=default - -# Attach Cloud NAT -gcloud compute routers nats create my-nat \ - --router=my-router \ - --region=us-central1 \ - --auto-allocate-nat-external-ips \ - --nat-all-subnet-ip-ranges -``` - -### Firewall Rules - -```bash -# IAP TCP tunneling (SSH/RDP via IAP) -gcloud compute firewall-rules create allow-iap-ssh \ - --network=default \ - --allow=tcp:22 \ - --source-ranges=35.235.240.0/20 \ - --description="Allow SSH via IAP" - -# GCP load balancer health checks -gcloud compute firewall-rules create allow-health-checks \ - --network=default \ - --allow=tcp \ - --source-ranges=130.211.0.0/22,35.191.0.0/16 \ - --description="Allow GCP health check probers" -``` - -| Purpose | CIDR | -|---------|------| -| IAP TCP forwarding | `35.235.240.0/20` | -| GCP health check probers | `130.211.0.0/22`, `35.191.0.0/16` | - ---- - -## Cloud Batch - -Cloud Batch is a fully managed service for batch and HPC workloads. It provisions, schedules, and autoscales VMs (including Spot/preemptible) without managing a cluster. - -**When to use Cloud Batch vs GKE:** - -| Aspect | Cloud Batch | GKE | -|--------|-------------|-----| -| Workload type | Batch jobs, array jobs, MPI | Long-running services, microservices | -| Cluster management | None (fully managed) | Cluster lifecycle managed by operator | -| Spot/preemptible | Built-in, first-class | Node pool configuration | -| GPU / HPC support | A100/H100, HPC VM families | Any accelerator, custom node pools | -| Scheduling | Queue-based, job arrays | Kubernetes scheduler | - -```bash -# Submit a simple batch job from JSON spec -gcloud batch jobs submit my-job \ - --location=us-central1 \ - --config=job.json -``` - ---- - -## References Index - -- **[IAM Guide](references/iam.md)** - Service accounts, role bindings, Workload Identity, and IAM best practices. - -## Cross-References - -- **Gemini CLI Extensions org**: — community-built Gemini CLI extensions for GCP services and tooling. - -## Documentation & References - -- **SDK Cheat Sheet**: `gcloud cheat-sheet` -- **Core Services List**: [Google Cloud Products](https://cloud.google.com/products) -- **CLI Reference**: [gcloud CLI docs](https://cloud.google.com/sdk/gcloud/reference) - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [GCP Scripting](https://github.com/cofin/flow/blob/main/templates/styleguides/cloud/gcp_scripting.md) -- [Bash](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/bash.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Validation - -Add validation instructions here. - diff --git a/plugins/flow/skills/gcp/agents/openai.yaml b/plugins/flow/skills/gcp/agents/openai.yaml deleted file mode 100644 index a7abd32..0000000 --- a/plugins/flow/skills/gcp/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Google Cloud Platform" - short_description: "gcloud, IAM, service accounts, storage, Pub/Sub, BigQuery, and GCP setup" diff --git a/plugins/flow/skills/gcp/references/iam.md b/plugins/flow/skills/gcp/references/iam.md deleted file mode 100644 index 28efc1a..0000000 --- a/plugins/flow/skills/gcp/references/iam.md +++ /dev/null @@ -1,268 +0,0 @@ -# GCP IAM Reference - -## Service Account Creation & Key Management - -### Creating Service Accounts - -```bash -# Create a service account -gcloud iam service-accounts create SA_NAME \ - --display-name="Human-readable name" \ - --description="Purpose of this account" \ - --project=PROJECT_ID - -# List existing service accounts -gcloud iam service-accounts list --project=PROJECT_ID --format="json" -``` - -### Key Management - -```bash -# Create a key (only when Workload Identity is not an option) -gcloud iam service-accounts keys create key.json \ - --iam-account=SA_NAME@PROJECT_ID.iam.gserviceaccount.com - -# List keys for an account -gcloud iam service-accounts keys list \ - --iam-account=SA_NAME@PROJECT_ID.iam.gserviceaccount.com \ - --format="table(name, validAfterTime, validBeforeTime)" - -# Delete a key -gcloud iam service-accounts keys delete KEY_ID \ - --iam-account=SA_NAME@PROJECT_ID.iam.gserviceaccount.com --quiet -``` - -## IAM Role Binding Patterns - -### Project-Level Bindings - -```bash -# Grant a role at the project level -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:SA_NAME@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/storage.objectViewer" - -# Remove a role -gcloud projects remove-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:SA_NAME@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/storage.objectViewer" - -# View current policy -gcloud projects get-iam-policy PROJECT_ID --format="json" -``` - -### Resource-Level Bindings - -```bash -# Cloud Storage bucket -gcloud storage buckets add-iam-policy-binding gs://BUCKET_NAME \ - --member="serviceAccount:SA_NAME@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/storage.objectAdmin" - -# Cloud Run service -gcloud run services add-iam-policy-binding SERVICE_NAME \ - --region=REGION \ - --member="user:email@example.com" \ - --role="roles/run.invoker" - -# Pub/Sub topic -gcloud pubsub topics add-iam-policy-binding TOPIC_NAME \ - --member="serviceAccount:SA_NAME@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/pubsub.publisher" -``` - -### Conditional Bindings - -```bash -# Grant access with a time-based condition -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="user:dev@example.com" \ - --role="roles/editor" \ - --condition="expression=request.time < timestamp('2026-06-01T00:00:00Z'),title=temp-access,description=Temporary editor access" -``` - -## Workload Identity - -### GKE Workload Identity - -Allows Kubernetes service accounts to act as IAM service accounts without keys. - -```bash -# 1. Enable Workload Identity on the cluster -gcloud container clusters update CLUSTER_NAME \ - --region=REGION \ - --workload-pool=PROJECT_ID.svc.id.goog - -# 2. Create a Kubernetes service account (via kubectl) -kubectl create serviceaccount KSA_NAME --namespace NAMESPACE - -# 3. Create an IAM service account -gcloud iam service-accounts create GSA_NAME --project=PROJECT_ID - -# 4. Grant the IAM SA the roles it needs -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:GSA_NAME@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/storage.objectViewer" - -# 5. Bind the KSA to the GSA -gcloud iam service-accounts add-iam-policy-binding \ - GSA_NAME@PROJECT_ID.iam.gserviceaccount.com \ - --role="roles/iam.workloadIdentityUser" \ - --member="serviceAccount:PROJECT_ID.svc.id.goog[NAMESPACE/KSA_NAME]" - -# 6. Annotate the KSA -kubectl annotate serviceaccount KSA_NAME \ - --namespace NAMESPACE \ - iam.gke.io/gcp-service-account=GSA_NAME@PROJECT_ID.iam.gserviceaccount.com -``` - -### Cloud Run Workload Identity - -Cloud Run uses the service account assigned at deploy time. - -```bash -# Deploy with a specific service account -gcloud run deploy SERVICE_NAME \ - --image=IMAGE_URL \ - --region=REGION \ - --service-account=GSA_NAME@PROJECT_ID.iam.gserviceaccount.com - -# Update an existing service's identity -gcloud run services update SERVICE_NAME \ - --region=REGION \ - --service-account=GSA_NAME@PROJECT_ID.iam.gserviceaccount.com -``` - -## Identity-Aware Proxy (IAP) - -### Overview - -Identity-Aware Proxy implements a zero-trust access model for GCP resources. Instead of relying on network perimeter security, IAP verifies user identity and context before granting access to applications. IAP sits in front of Cloud Run services, GKE ingresses, and Compute Engine instances, enforcing authentication and authorization at the infrastructure layer. - -- Concepts: -- Enabling for Cloud Run: - -### IAP for Cloud Run - -```bash -# Deploy with IAP enabled -gcloud run deploy SERVICE_NAME \ - --image=IMAGE_URL \ - --region=REGION \ - --no-allow-unauthenticated \ - --iap - -# Enable IAP API -gcloud services enable iap.googleapis.com --project=PROJECT_ID - -# Create IAP service identity -gcloud beta services identity create \ - --service=iap.googleapis.com \ - --project=PROJECT_ID - -# Grant IAP service agent run.invoker so IAP can forward requests -PROJECT_NUMBER=$(gcloud projects describe PROJECT_ID --format='value(projectNumber)') -IAP_SA="service-${PROJECT_NUMBER}@gcp-sa-iap.iam.gserviceaccount.com" - -gcloud run services add-iam-policy-binding SERVICE_NAME \ - --region=REGION \ - --member="serviceAccount:${IAP_SA}" \ - --role="roles/run.invoker" -``` - -### IAP for GKE - -```bash -# Enable IAP on a GKE backend service (via Ingress) -# IAP is configured through BackendConfig in GKE: -# 1. Create an OAuth client ID in the Cloud Console -# 2. Store client ID and secret in a Kubernetes secret -kubectl create secret generic iap-secret \ - --from-literal=client_id=CLIENT_ID \ - --from-literal=client_secret=CLIENT_SECRET - -# 3. Reference in BackendConfig: -# apiVersion: cloud.google.com/v1 -# kind: BackendConfig -# metadata: -# name: iap-config -# spec: -# iap: -# enabled: true -# oauthclientCredentials: -# secretName: iap-secret -``` - -### Service Account Permissions for IAP - -```bash -# IAP service agent needs run.invoker to forward requests to Cloud Run -gcloud run services add-iam-policy-binding SERVICE_NAME \ - --region=REGION \ - --member="serviceAccount:service-PROJECT_NUMBER@gcp-sa-iap.iam.gserviceaccount.com" \ - --role="roles/run.invoker" - -# Grant users/groups access through IAP -gcloud beta iap web add-iam-policy-binding \ - --project=PROJECT_ID \ - --resource-type=cloud-run \ - --service=SERVICE_NAME \ - --region=REGION \ - --member="group:team@example.com" \ - --role="roles/iap.httpsResourceAccessor" - -# Grant IAP TCP tunneling access (for bastion/SSH) -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="group:devs@example.com" \ - --role="roles/iap.tunnelResourceAccessor" -``` - -### Key IAP Roles - -| Role | Purpose | -|-----------------------------------|--------------------------------------------------| -| `roles/iap.httpsResourceAccessor` | Access web applications behind IAP | -| `roles/iap.tunnelResourceAccessor`| Access VMs via IAP TCP tunneling (SSH, RDP) | -| `roles/iap.admin` | Full IAP configuration and policy management | - -### JWT Verification Patterns - -IAP sends a signed JWT in the `X-Goog-IAP-JWT-Assertion` header. Backend services should validate this token to confirm the request came through IAP. - -```bash -# JWT claims include: sub, email, aud, iss, exp, iat -# Issuer: https://cloud.google.com/iap -# Algorithm: ES256 -# JWKS endpoint: https://www.gstatic.com/iap/verify/public_key-jwk -# Audience format for Cloud Run: /projects/{NUM}/locations/{REGION}/services/{SERVICE} -``` - -Validation checklist: - -1. Verify signature against Google's JWKS public keys. -2. Check `iss` equals `https://cloud.google.com/iap`. -3. Check `aud` matches your service's expected audience. -4. Check `exp` has not passed (allow clock skew of ~30 seconds). -5. Extract `email` claim for user identification. - -### OAuth Brand (Consent Screen) - -```bash -# List existing brands -gcloud alpha iap oauth-brands list --project=PROJECT_ID --format=json - -# Create internal brand for Workspace organizations -gcloud alpha iap oauth-brands create \ - --project=PROJECT_ID \ - --application_title="My App" \ - --support_email="admin@example.com" -``` - -## Best Practices - -1. **Principle of least privilege**: Grant only the minimum roles required. Prefer predefined roles over primitive roles (`roles/viewer`, `roles/editor`, `roles/owner`). -2. **Avoid user-managed keys**: Use Workload Identity (GKE, Cloud Run) or default service account credentials wherever possible. Keys are long-lived secrets that are easy to leak. -3. **Rotate keys when unavoidable**: If you must use keys, rotate them regularly and delete old keys immediately. -4. **Use custom roles sparingly**: Prefer predefined roles. Create custom roles only when no predefined role matches the exact permission set needed. -5. **Audit regularly**: Use `gcloud asset search-all-iam-policies` or Policy Analyzer to review who has access to what. -6. **Scope service accounts per service**: Create dedicated service accounts for each workload rather than sharing a single account across services. diff --git a/plugins/flow/skills/gke/SKILL.md b/plugins/flow/skills/gke/SKILL.md deleted file mode 100644 index dc4e562..0000000 --- a/plugins/flow/skills/gke/SKILL.md +++ /dev/null @@ -1,342 +0,0 @@ ---- -name: gke -description: "Use when working with GKE, kubectl, Kubernetes manifests, k8s directories, Helm charts, node pools, workload identity, cluster scaling, GPU nodes, database sidecars, or GKE troubleshooting." ---- - -# Google Kubernetes Engine (GKE) - -GKE is Google Cloud's managed Kubernetes service, handling cluster management, upgrades, scaling, GPU workloads, and production database connectivity via Auth Proxy sidecars. - -## Quick Reference - -### GPU Pod Spec (Quick) - -```yaml -resources: - limits: - nvidia.com/gpu: "1" # GPU in limits ONLY — never in requests -``` - -Add toleration for tainted GPU nodes: - -```yaml -tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule -``` - -### Workload Identity Binding (2-command pattern) - -```bash -# 1. Annotate the KSA with the GCP SA email -kubectl annotate serviceaccount KSA_NAME \ - --namespace=NAMESPACE \ - iam.gke.io/gcp-service-account=GSA_NAME@PROJECT_ID.iam.gserviceaccount.com - -# 2. Bind GCP SA to allow KSA impersonation -gcloud iam service-accounts add-iam-policy-binding \ - GSA_NAME@PROJECT_ID.iam.gserviceaccount.com \ - --role="roles/iam.workloadIdentityUser" \ - --member="serviceAccount:PROJECT_ID.svc.id.goog[NAMESPACE/KSA_NAME]" -``` - -### AlloyDB Auth Proxy Sidecar (Quick) - -```yaml -- name: alloydb-auth-proxy - image: gcr.io/alloydb-connectors/alloydb-auth-proxy:latest - args: - - "projects/PROJECT_ID/locations/REGION/clusters/CLUSTER/instances/INSTANCE" - - "--port=5432" - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 65532 - capabilities: - drop: [ALL] -``` - -See [alloydb-on-gke.md](references/alloydb-on-gke.md) for the full production pattern. - -### kubectl Essentials - -```bash -# Cluster access -gcloud container clusters get-credentials CLUSTER --region=REGION -kubectl config use-context CONTEXT_NAME - -# Core operations -kubectl get nodes -kubectl get pods -A -kubectl logs -f POD_NAME -n NAMESPACE -kubectl exec -it POD_NAME -n NAMESPACE -- /bin/sh -kubectl apply -f manifest.yaml -``` - -### Deployment Workflow - -1. **Cluster** -- Autopilot (recommended) or Standard mode, always regional for production. -2. **Workload Identity** -- bind KSA to GSA; never use node service accounts. -3. **Deploy** -- `kubectl apply` or Helm chart with per-component values (web, workers). -4. **Scale** -- HPA for pods, VPA for right-sizing, Cluster Autoscaler for nodes. -5. **Observe** -- `kubectl logs`, `kubectl describe`, `kubectl top`. - -### Helm Chart Pattern - -```text -chart/ - Chart.yaml - values.yaml - templates/ - _helpers.tpl - web-deployment.yaml - web-service.yaml - worker-deployment.yaml - migration-job.yaml -``` - -Structure `values.yaml` with separate sections per component (`web`, `workers`), each specifying `replicaCount`, `image`, `command`, `resources`, and `port`. - -## Database on GKE - -### AlloyDB on GKE - -Connect to AlloyDB via the Auth Proxy sidecar + Workload Identity. The proxy runs as a sidecar and listens on `localhost:5432`. Application connects to `postgresql://user:password@localhost:5432/dbname`. - -Key roles for GSA: `roles/alloydb.client`, `roles/secretmanager.secretAccessor`, `roles/storage.objectAdmin`, `roles/logging.logWriter`. - -See **[alloydb-on-gke.md](references/alloydb-on-gke.md)** for full deployment, HPA with queue-depth metrics, CronJob queue monitor, and Job patterns. - -### Cloud SQL on GKE - -Connect to Cloud SQL via the `cloud-sql-proxy` sidecar. Same Workload Identity pattern; GSA needs `roles/cloudsql.client`. - -See **[cloudsql-on-gke.md](references/cloudsql-on-gke.md)** for pod spec and connection string format. - ---- - -## GPU Workloads - -| GPU Type | Machine Series | Notes | -|---|---|---| -| NVIDIA T4 | N1 | Cost-effective inference | -| NVIDIA L4 | G2 | Efficient inference/fine-tuning | -| NVIDIA A100 (40/80GB) | A2 | Large-scale training, MIG support | -| NVIDIA H100 (80GB) | A3 | Highest throughput, MIG support | - -**Autopilot GPU**: automatic driver install, pay-per-pod billing, MIG enabled by default (v1.29.3+). Simpler operations. - -**Standard GPU**: manual driver install via DaemonSet or GPU Operator (`helm install gpu-operator nvidia/gpu-operator`). Full node control. - -```yaml -# Minimal GPU pod spec -spec: - tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - containers: - - name: trainer - image: nvcr.io/nvidia/pytorch:24.01-py3 - resources: - limits: - nvidia.com/gpu: "1" # GPU in limits only; limits == requests for GPU -``` - -See **[gpu.md](references/gpu.md)** for time-sharing, MIG, NAP, Spot GPU, and TPU patterns. - ---- - - - -## Workflow - -### Step 1: Cluster Setup - -Choose Autopilot (Google-managed nodes, pay-per-pod) or Standard (full node control). Use regional clusters for production HA. Enable Workload Identity at cluster creation. - -### Step 2: Workload Identity Configuration - -```bash -# Create GSA + grant permissions -gcloud iam service-accounts create GSA_NAME -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:GSA_NAME@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/storage.admin" - -# Create KSA + bind to GSA -kubectl create serviceaccount KSA_NAME --namespace NAMESPACE -gcloud iam service-accounts add-iam-policy-binding \ - GSA_NAME@PROJECT_ID.iam.gserviceaccount.com \ - --role="roles/iam.workloadIdentityUser" \ - --member="serviceAccount:PROJECT_ID.svc.id.goog[NAMESPACE/KSA_NAME]" - -# Annotate KSA -kubectl annotate serviceaccount KSA_NAME \ - --namespace=NAMESPACE \ - iam.gke.io/gcp-service-account=GSA_NAME@PROJECT_ID.iam.gserviceaccount.com -``` - -### Step 3: Deploy Application - -Apply manifests or install Helm chart. Set resource requests/limits on every container. Add PodDisruptionBudgets for availability during upgrades. - -### Step 4: Validate - -Run `kubectl get pods -n NAMESPACE` to confirm healthy rollout. Check logs and events for errors. - - - - - -## Guardrails - -- **Always use Workload Identity** -- never attach permissions via node service account. Bind KSA-to-GSA explicitly. -- **Set resource requests AND limits** on every container -- prevents noisy-neighbor issues and enables HPA/VPA. -- **Use PodDisruptionBudgets** -- ensures minimum availability during voluntary disruptions (node upgrades, cluster scaling). -- **Regional clusters for production** -- zonal clusters are single points of failure. -- **Autopilot preferred** unless you need GPU node pools or custom machine types. -- **Never expose workloads without network policies** -- restrict ingress/egress at the namespace level. -- **GPU in limits only** -- never put `nvidia.com/gpu` in `requests`; limits implicitly equal requests for GPU resources. -- **Taint GPU nodes** -- use `nvidia.com/gpu=present:NoSchedule` to prevent non-GPU pods from landing on expensive GPU nodes. -- **Security context: nonroot** -- always set `runAsNonRoot: true`, `runAsUser: 65532`, `runAsGroup: 65532`, `fsGroup: 65532`, `allowPrivilegeEscalation: false`, `capabilities.drop: [ALL]`. -- **Use Spot for fault-tolerant GPU workloads** -- 60-90% discount vs on-demand; combine with checkpointing for training jobs. - - - - - -### Validation Checkpoint - -Before delivering GKE configurations, verify: - -- [ ] Workload Identity is configured (no node SA usage) -- [ ] Every container has resource requests and limits -- [ ] PodDisruptionBudgets are defined for production workloads -- [ ] Cluster is regional (not zonal) for production -- [ ] Health checks (readiness + liveness probes) are defined -- [ ] Namespace isolation and network policies are present -- [ ] GPU resources are in `limits` only (not `requests`) -- [ ] GPU node pools have `nvidia.com/gpu=present:NoSchedule` taint -- [ ] Security context sets `runAsNonRoot: true`, `runAsUser: 65532`, `capabilities.drop: [ALL]` -- [ ] Database connections use Auth Proxy sidecar (not direct IP with credentials) - - - - - -## Example - -**Task:** Deploy a web application with a Service on GKE. - -```yaml -# deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: web-app - namespace: production -spec: - replicas: 3 - selector: - matchLabels: - app: web-app - template: - metadata: - labels: - app: web-app - spec: - serviceAccountName: web-app-ksa # Workload Identity KSA - containers: - - name: web - image: us-central1-docker.pkg.dev/my-project/repo/web-app:v1.2.0 - ports: - - containerPort: 8080 - resources: - requests: - cpu: 250m - memory: 256Mi - limits: - cpu: "1" - memory: 1Gi - readinessProbe: - httpGet: - path: /health - port: 8080 - initialDelaySeconds: 5 - periodSeconds: 10 - livenessProbe: - httpGet: - path: /health - port: 8080 - initialDelaySeconds: 15 - periodSeconds: 20 ---- -# service.yaml -apiVersion: v1 -kind: Service -metadata: - name: web-app - namespace: production -spec: - selector: - app: web-app - ports: - - port: 80 - targetPort: 8080 - type: ClusterIP ---- -# pdb.yaml -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: web-app-pdb - namespace: production -spec: - minAvailable: 2 - selector: - matchLabels: - app: web-app -``` - - - ---- - -> **No Gemini CLI extension exists for GKE** -- this skill provides unique value for GKE cluster management, GPU workloads, and production database connectivity patterns. - -## References Index - -For detailed guides and configuration examples, refer to the following documents in `references/`: - -- **[Cluster Management](references/cluster.md)** -- Autopilot vs Standard, Regional/Zonal setups, Private clusters. -- **[Node Pools](references/node_pools.md)** -- Creation, specialized pools (GPU, Spot), and management. -- **[Workload Identity](references/workload_identity.md)** -- Secure GCP API access configuration. -- **[Autoscaling](references/autoscaling.md)** -- HPA, VPA, and Cluster Autoscaler setups. -- **[Networking](references/networking.md)** -- Service types, GCE Ingress, and Network Policies. -- **[Security](references/security.md)** -- Hardening, Pod security contexts, and Secret Manager. -- **[Terraform Configuration](references/terraform.md)** -- Module examples for Autopilot and Standard. -- **[kubectl Commands](references/kubectl.md)** -- Essential access and operations commands. -- **[Troubleshooting](references/troubleshooting.md)** -- Debugging nodes, pods, and network issues. -- **[Helm Deployment](references/helm_deployment.md)** -- Helm chart patterns for web + worker deployments. -- **[SAQ Workers](references/saq_workers.md)** -- SAQ worker architecture, queue distribution, and graceful shutdown. -- **[GPU/TPU Workloads](references/gpu.md)** -- Node pool creation, time-sharing, MIG, NAP, Spot GPU, TPU. -- **[AlloyDB on GKE](references/alloydb-on-gke.md)** -- Auth Proxy sidecar, Workload Identity, HPA with queue-depth metrics. -- **[Cloud SQL on GKE](references/cloudsql-on-gke.md)** -- Cloud SQL Auth Proxy sidecar and connection patterns. -- **[Batch Workloads](references/batch-workloads.md)** -- Jobs, JobSet, ProvisioningRequest, Cloud Batch vs GKE. - ---- - -## Official References - -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [GCP Scripting](https://github.com/cofin/flow/blob/main/templates/styleguides/cloud/gcp_scripting.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/gke/agents/openai.yaml b/plugins/flow/skills/gke/agents/openai.yaml deleted file mode 100644 index f601910..0000000 --- a/plugins/flow/skills/gke/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "GKE" - short_description: "Google Kubernetes Engine clusters, workloads, Helm, kubectl, security, and scaling" diff --git a/plugins/flow/skills/gke/references/alloydb-on-gke.md b/plugins/flow/skills/gke/references/alloydb-on-gke.md deleted file mode 100644 index 057c723..0000000 --- a/plugins/flow/skills/gke/references/alloydb-on-gke.md +++ /dev/null @@ -1,591 +0,0 @@ -# AlloyDB on GKE - -Production patterns for connecting GKE workloads to AlloyDB using the Auth Proxy sidecar and Workload Identity. - -## Architecture - -The AlloyDB Auth Proxy runs as a **sidecar container** in the same Pod as the application. The proxy handles authentication (via Workload Identity) and encryption, and exposes AlloyDB on `localhost:5432`. The application connects as if to a local Postgres instance. - -```text -Pod -├── app-container → localhost:5432 → alloydb-auth-proxy → AlloyDB (private IP) -└── alloydb-auth-proxy (sidecar) -``` - -## Auth Proxy Sidecar Container Spec - -```yaml -- name: alloydb-auth-proxy - image: gcr.io/alloydb-connectors/alloydb-auth-proxy:latest - args: - - "projects/PROJECT_ID/locations/REGION/clusters/ALLOYDB_CLUSTER/instances/ALLOYDB_INSTANCE" - - "--port=5432" - - "--auto-iam-authn" # optional: use IAM-based authentication - - "--structured-logging" # JSON logs for Cloud Logging - resources: - requests: - cpu: "0.5" - memory: "512Mi" - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - capabilities: - drop: [ALL] -``` - -The instance URI format is: `projects/PROJECT_ID/locations/REGION/clusters/CLUSTER_NAME/instances/INSTANCE_NAME` - -Retrieve it with: - -```bash -gcloud alloydb instances describe INSTANCE_NAME \ - --cluster=CLUSTER_NAME \ - --region=REGION \ - --project=PROJECT_ID \ - --format="value(name)" -``` - -## Workload Identity Setup - -Full command sequence to create and configure the GSA, KSA, and binding: - -```bash -# 1. Create the GCP Service Account -gcloud iam service-accounts create worker-sa \ - --project=PROJECT_ID \ - --display-name="GKE Worker Service Account" - -# 2. Grant required roles to the GSA -ROLES=( - "roles/alloydb.client" - "roles/secretmanager.secretAccessor" - "roles/storage.objectAdmin" - "roles/logging.logWriter" - "roles/monitoring.metricWriter" - "roles/artifactregistry.reader" -) -for ROLE in "${ROLES[@]}"; do - gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:worker-sa@PROJECT_ID.iam.gserviceaccount.com" \ - --role="${ROLE}" -done - -# 3. Create the Kubernetes Service Account -kubectl create serviceaccount worker-ksa --namespace=NAMESPACE - -# 4. Annotate the KSA with the GCP SA email -kubectl annotate serviceaccount worker-ksa \ - --namespace=NAMESPACE \ - iam.gke.io/gcp-service-account=worker-sa@PROJECT_ID.iam.gserviceaccount.com - -# 5. Allow KSA to impersonate the GCP SA (the binding) -gcloud iam service-accounts add-iam-policy-binding \ - worker-sa@PROJECT_ID.iam.gserviceaccount.com \ - --project=PROJECT_ID \ - --role="roles/iam.workloadIdentityUser" \ - --member="serviceAccount:PROJECT_ID.svc.id.goog[NAMESPACE/worker-ksa]" -``` - -## Pod Security Context - -Always use this security context for AlloyDB proxy pods and application pods: - -```yaml -spec: - securityContext: - runAsNonRoot: true - runAsUser: 65532 # distroless nonroot user - runAsGroup: 65532 - fsGroup: 65532 - containers: - - name: app - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] -``` - -The UID 65532 matches the `nonroot` user in distroless images. The AlloyDB Auth Proxy image is distroless; running as root causes it to refuse to start. - -## Kubernetes Secrets from Secret Manager - -Sync Secret Manager secrets into a Kubernetes secret (KEY=VALUE format): - -```bash -# Fetch secret payload and create K8s secret -gcloud secrets versions access latest \ - --secret=MY_SECRET \ - --project=PROJECT_ID \ - > /tmp/app-secrets.env - -kubectl delete secret app-secrets -n NAMESPACE --ignore-not-found=true -kubectl create secret generic app-secrets \ - -n NAMESPACE \ - --from-env-file=/tmp/app-secrets.env - -rm -f /tmp/app-secrets.env -``` - -Reference in pod spec: - -```yaml -envFrom: - - secretRef: - name: app-secrets -``` - -Or mount individual keys: - -```yaml -env: - - name: DATABASE_URL - valueFrom: - secretKeyRef: - name: app-secrets - key: DATABASE_URL -``` - -## Full Worker Deployment Example - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: app-worker -spec: - replicas: 1 - selector: - matchLabels: - app: app-worker - template: - metadata: - labels: - app: app-worker - annotations: - cloud.google.com/compute-class: "Scale-Out" # Autopilot: prefer scale-out - spec: - serviceAccountName: worker-ksa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - containers: - - name: worker - image: us-central1-docker.pkg.dev/PROJECT_ID/repo/app:latest - command: ["app", "server", "run-worker"] - resources: - requests: - cpu: "4" - memory: "24Gi" - ephemeral-storage: "50Gi" - envFrom: - - secretRef: - name: app-secrets - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] - - name: alloydb-auth-proxy - image: gcr.io/alloydb-connectors/alloydb-auth-proxy:latest - args: - - "projects/PROJECT_ID/locations/REGION/clusters/CLUSTER/instances/INSTANCE" - - "--port=5432" - - "--structured-logging" - resources: - requests: - cpu: "0.5" - memory: "512Mi" - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - capabilities: - drop: [ALL] - volumes: - - name: work - ephemeral: - volumeClaimTemplate: - spec: - accessModes: ["ReadWriteOnce"] - storageClassName: standard-rwo - resources: - requests: - storage: 50Gi -``` - -## Web Deployment Example - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: app-web -spec: - replicas: 2 - selector: - matchLabels: - app: app-web - template: - spec: - serviceAccountName: worker-ksa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - containers: - - name: web - image: us-central1-docker.pkg.dev/PROJECT_ID/repo/app:latest - command: ["app", "server", "run", "--host", "0.0.0.0", "--port", "8080"] - ports: - - containerPort: 8080 - resources: - requests: - cpu: "1" - memory: "1Gi" - envFrom: - - secretRef: - name: app-secrets - readinessProbe: - httpGet: - path: /health - port: 8080 - initialDelaySeconds: 10 - periodSeconds: 10 - livenessProbe: - httpGet: - path: /health - port: 8080 - initialDelaySeconds: 30 - periodSeconds: 30 - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] - - name: alloydb-auth-proxy - image: gcr.io/alloydb-connectors/alloydb-auth-proxy:latest - args: - - "projects/PROJECT_ID/locations/REGION/clusters/CLUSTER/instances/INSTANCE" - - "--port=5432" - - "--structured-logging" - resources: - requests: - cpu: "0.5" - memory: "512Mi" - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - capabilities: - drop: [ALL] -``` - -## Job Patterns - -### Database Initialization Job - -Use `google/alloydbomni` image for psql access when the AlloyDB Auth Proxy sidecar is not needed (direct private IP access during setup): - -```yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: db-init -spec: - ttlSecondsAfterFinished: 300 # auto-delete after 5 minutes - backoffLimit: 0 # no retries (CREATE DATABASE is not idempotent) - template: - spec: - serviceAccountName: worker-ksa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - restartPolicy: Never - containers: - - name: create-db - image: google/alloydbomni:latest - command: - - bash - - -euc - - | - PG="host=${PGHOST} dbname=postgres user=postgres sslmode=require" - if psql "$PG" -tc "SELECT 1 FROM pg_database WHERE datname = 'mydb'" | grep -q 1; then - echo "Database already exists" - else - psql "$PG" -c "CREATE DATABASE mydb" - echo "Database created" - fi - env: - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: db-init-creds - key: PGPASSWORD - - name: PGHOST - valueFrom: - secretKeyRef: - name: db-init-creds - key: PGHOST - resources: - requests: - cpu: "0.2" - memory: "256Mi" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] -``` - -### Migration Job with Auth Proxy as initContainer (sidecar) - -In Kubernetes 1.29+, use `restartPolicy: Always` on an initContainer to run it as a sidecar (stays alive while the main container runs): - -```yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: db-migrate -spec: - ttlSecondsAfterFinished: 600 - backoffLimit: 3 - template: - spec: - serviceAccountName: worker-ksa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - restartPolicy: Never - initContainers: - - name: alloydb-auth-proxy - image: gcr.io/alloydb-connectors/alloydb-auth-proxy:latest - restartPolicy: Always # sidecar: stays alive while main containers run - args: - - "projects/PROJECT_ID/locations/REGION/clusters/CLUSTER/instances/INSTANCE" - - "--port=5432" - resources: - requests: - cpu: "0.5" - memory: "512Mi" - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - capabilities: - drop: [ALL] - containers: - - name: migrate - image: us-central1-docker.pkg.dev/PROJECT_ID/repo/app:latest - command: ["app", "manage", "db", "upgrade", "--no-prompt"] - envFrom: - - secretRef: - name: app-secrets - resources: - requests: - cpu: "0.5" - memory: "512Mi" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] -``` - -## Queue-Depth HPA - -Scale worker deployments based on database queue depth published to Cloud Monitoring. - -### Step 1: Install Custom Metrics Stackdriver Adapter - -The adapter exposes Cloud Monitoring metrics to the HPA via the `external.metrics.k8s.io` API group. - -```bash -kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/k8s-stackdriver/master/custom-metrics-stackdriver-adapter/deploy/production/adapter_new_resource_model.yaml -``` - -On GKE Autopilot, bind the adapter's KSA to a GCP SA with `roles/monitoring.viewer`: - -```bash -kubectl annotate serviceaccount custom-metrics-stackdriver-adapter \ - --namespace=custom-metrics \ - --overwrite \ - iam.gke.io/gcp-service-account=worker-sa@PROJECT_ID.iam.gserviceaccount.com - -gcloud iam service-accounts add-iam-policy-binding \ - worker-sa@PROJECT_ID.iam.gserviceaccount.com \ - --project=PROJECT_ID \ - --role="roles/iam.workloadIdentityUser" \ - --member="serviceAccount:PROJECT_ID.svc.id.goog[custom-metrics/custom-metrics-stackdriver-adapter]" - -# Restart to pick up the new WI annotation -kubectl rollout restart deployment/custom-metrics-stackdriver-adapter -n custom-metrics -``` - -### Step 2: CronJob Queue Monitor - -The monitor uses two containers: `alloydbomni` (has psql, lacks curl) as an initContainer to query the DB, and `curlimages/curl` to push the metric to Cloud Monitoring. Communicate via a shared `emptyDir` volume. - -```yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: queue-monitor -spec: - schedule: "* * * * *" # every minute - concurrencyPolicy: Forbid # skip if previous run is still running - successfulJobsHistoryLimit: 3 - failedJobsHistoryLimit: 3 - jobTemplate: - spec: - ttlSecondsAfterFinished: 120 - activeDeadlineSeconds: 55 # must finish before next minute - template: - spec: - serviceAccountName: worker-ksa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - restartPolicy: Never - volumes: - - name: shared - emptyDir: {} - initContainers: - - name: query - image: google/alloydbomni:latest - command: - - bash - - -euc - - | - COUNT=$(psql "${DATABASE_URL}" \ - -qtAc "SELECT COUNT(*) FROM job WHERE status IN ('pending','scheduled')" \ - 2>/dev/null || echo 0) - echo "${COUNT}" > /shared/count - envFrom: - - secretRef: - name: app-secrets - volumeMounts: - - name: shared - mountPath: /shared - resources: - requests: - cpu: "100m" - memory: "128Mi" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] - containers: - - name: push - image: curlimages/curl:latest - command: - - sh - - -euc - - | - COUNT=$(cat /shared/count | tr -d '[:space:]') - TOKEN=$(curl -sf --max-time 5 \ - -H "Metadata-Flavor: Google" \ - "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" \ - | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') - NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - curl -sf --max-time 10 -X POST \ - -H "Authorization: Bearer ${TOKEN}" \ - -H "Content-Type: application/json" \ - -d "{\"timeSeries\":[{\"metric\":{\"type\":\"custom.googleapis.com/app/queue_depth\"},\"resource\":{\"type\":\"global\",\"labels\":{\"project_id\":\"PROJECT_ID\"}},\"points\":[{\"interval\":{\"endTime\":\"${NOW}\"},\"value\":{\"int64Value\":\"${COUNT}\"}}]}]}" \ - "https://monitoring.googleapis.com/v3/projects/PROJECT_ID/timeSeries" - echo "Pushed queue depth: ${COUNT}" - volumeMounts: - - name: shared - mountPath: /shared - resources: - requests: - cpu: "100m" - memory: "64Mi" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] -``` - -### Step 3: Seed Initial Metric - -Without seeding, the HPA shows "unable to get external metric" until the CronJob pushes its first value. Seed immediately after deploying: - -```bash -TOKEN=$(gcloud auth print-access-token) -NOW=$(date -u +"%Y-%m-%dT%H:%M:%SZ") -curl -X POST \ - -H "Authorization: Bearer ${TOKEN}" \ - -H "Content-Type: application/json" \ - -d "{\"timeSeries\":[{\"metric\":{\"type\":\"custom.googleapis.com/app/queue_depth\"},\"resource\":{\"type\":\"global\",\"labels\":{\"project_id\":\"PROJECT_ID\"}},\"points\":[{\"interval\":{\"endTime\":\"${NOW}\"},\"value\":{\"int64Value\":\"0\"}}]}]}" \ - "https://monitoring.googleapis.com/v3/projects/PROJECT_ID/timeSeries" -``` - -### Step 4: HPA Manifest - -```yaml -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: app-worker-hpa -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: app-worker - minReplicas: 1 - maxReplicas: 10 - behavior: - scaleDown: - stabilizationWindowSeconds: 300 # wait 5 min before scaling down - metrics: - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: 70 - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: 80 - - type: External - external: - metric: - name: custom.googleapis.com|app|queue_depth # | not / in HPA - target: - type: AverageValue - averageValue: "15" # scale up when avg queue depth > 15 per replica -``` - -**Important:** Verify the external metric is readable before applying the HPA with the external metric section: - -```bash -kubectl get --raw "/apis/external.metrics.k8s.io/v1beta1/namespaces/NAMESPACE/custom.googleapis.com|app|queue_depth" -``` - -If the metric is not yet readable, apply the HPA with CPU/Memory only first, then add the external metric after the CronJob has pushed data. - -## Connection String Format - -```text -postgresql+asyncpg://DB_USER:DB_PASSWORD@localhost:5432/DB_NAME -``` - -The proxy always listens on `localhost` inside the pod. AlloyDB requires `sslmode=require` for direct connections (bypassing the proxy) but the proxy itself handles TLS -- app connects over plain TCP on localhost. - -## Official References - -- -- -- diff --git a/plugins/flow/skills/gke/references/autoscaling.md b/plugins/flow/skills/gke/references/autoscaling.md deleted file mode 100644 index 5013153..0000000 --- a/plugins/flow/skills/gke/references/autoscaling.md +++ /dev/null @@ -1,54 +0,0 @@ -# GKE Autoscaling - -## Horizontal Pod Autoscaler (HPA) - -Scales replicas based on CPU/Memory. - -```yaml -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: my-hpa -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: my-deployment - minReplicas: 2 - maxReplicas: 100 - metrics: - - type: Resource - resource: - name: cpu - target: { type: Utilization, averageUtilization: 70 } -``` - -## Vertical Pod Autoscaler (VPA) - -Adjusts CPU/Memory requests/limits for pods. - -```yaml -apiVersion: autoscaling.k8s.io/v1 -kind: VerticalPodAutoscaler -metadata: - name: my-vpa -spec: - targetRef: - apiVersion: apps/v1 - kind: Deployment - name: my-deployment - updatePolicy: - updateMode: "Auto" -``` - -## Cluster Autoscaler - -Scales node pools up/down based on pod demand. - -```bash -gcloud container clusters update CLUSTER \ - --enable-autoscaling \ - --min-nodes=1 \ - --max-nodes=100 \ - --node-pool=POOL -``` diff --git a/plugins/flow/skills/gke/references/batch-workloads.md b/plugins/flow/skills/gke/references/batch-workloads.md deleted file mode 100644 index e19bfd7..0000000 --- a/plugins/flow/skills/gke/references/batch-workloads.md +++ /dev/null @@ -1,224 +0,0 @@ -# Batch Workloads on GKE - -Patterns for running batch Jobs, distributed training, and large GPU batch workloads on GKE. - -## Kubernetes Job with GPU - -A basic GPU training job. Use `restartPolicy: Never` for training (each failure creates a new pod); use `restartPolicy: OnFailure` only for idempotent tasks. - -```yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: gpu-training-job -spec: - ttlSecondsAfterFinished: 3600 # auto-delete 1 hour after completion - backoffLimit: 2 # retry up to 2 times on failure - template: - spec: - tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - nodeSelector: - cloud.google.com/gke-accelerator: nvidia-tesla-a100 - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - restartPolicy: Never - containers: - - name: trainer - image: nvcr.io/nvidia/pytorch:24.01-py3 - command: ["python", "train.py", "--checkpoint-dir=/checkpoints"] - resources: - limits: - nvidia.com/gpu: "1" # GPU in limits ONLY - volumeMounts: - - name: checkpoints - mountPath: /checkpoints - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] - volumes: - - name: checkpoints - persistentVolumeClaim: - claimName: training-checkpoints-pvc -``` - -**Always use checkpointing for Spot GPU jobs.** Save checkpoints to a PVC or Cloud Storage (via GCS FUSE or `gsutil`) so training can resume after preemption. - -## JobSet for Distributed Training - -JobSet (v0.5+) is a Kubernetes API for coordinating multi-worker distributed training. Each `replicatedJob` represents a role (e.g., leader + workers). All jobs within a JobSet start and stop together. - -```bash -# Install JobSet controller -kubectl apply --server-side -f https://github.com/kubernetes-sigs/jobset/releases/download/v0.5.1/manifests.yaml -``` - -```yaml -apiVersion: jobset.x-k8s.io/v1alpha2 -kind: JobSet -metadata: - name: distributed-training -spec: - failurePolicy: - maxRestarts: 3 # restart the entire JobSet up to 3 times on failure - successPolicy: - operator: All # JobSet succeeds when all replicatedJobs complete - targetReplicatedJobs: - - workers - replicatedJobs: - - name: leader - replicas: 1 - template: - spec: - backoffLimit: 0 - template: - spec: - tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - restartPolicy: Never - containers: - - name: leader - image: nvcr.io/nvidia/pytorch:24.01-py3 - command: ["torchrun", "--nnodes=5", "--nproc_per_node=8", "--node_rank=0", - "--master_addr=$(JOBSET_NAME)-workers-0-0.$(JOBSET_NAME)", - "train.py"] - resources: - limits: - nvidia.com/gpu: "8" - env: - - name: JOBSET_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['jobset.x-k8s.io/jobset-name'] - - name: workers - replicas: 4 # 4 worker pods - template: - spec: - backoffLimit: 0 - template: - spec: - tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - restartPolicy: Never - containers: - - name: worker - image: nvcr.io/nvidia/pytorch:24.01-py3 - command: ["torchrun", "--nnodes=5", "--nproc_per_node=8", - "--node_rank=$(JOB_COMPLETION_INDEX)", - "--master_addr=$(JOBSET_NAME)-leader-0-0.$(JOBSET_NAME)", - "train.py"] - resources: - limits: - nvidia.com/gpu: "8" -``` - -JobSet creates headless Services for pod-to-pod communication. Pods address each other by DNS: `---.`. - -## ProvisioningRequest API (v1.28+) - -ProvisioningRequest enables queued provisioning for large GPU batches. Instead of pods waiting in Pending state while node capacity is provisioned, submit a single request that GKE fulfills atomically before scheduling any pods. - -**Best for:** Large A100/H100 training runs requiring 8+ GPU nodes that must all be available simultaneously. - -```yaml -apiVersion: autoscaling.x-k8s.io/v1beta1 -kind: ProvisioningRequest -metadata: - name: large-training-run -spec: - provisioningClassName: queued-provisioning.gke.io - parameters: - maxRunDurationSeconds: "86400" # 24-hour max run duration - podSets: - - count: 8 # provision 8 nodes - podTemplateRef: - name: gpu-pod-template ---- -apiVersion: v1 -kind: PodTemplate -metadata: - name: gpu-pod-template -template: - spec: - tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - nodeSelector: - cloud.google.com/gke-accelerator: nvidia-h100-80gb - containers: - - name: placeholder - image: nvcr.io/nvidia/pytorch:24.01-py3 - resources: - limits: - nvidia.com/gpu: "8" -``` - -Monitor the ProvisioningRequest: - -```bash -kubectl get provisioningrequest large-training-run -o yaml -kubectl describe provisioningrequest large-training-run -``` - -Once the request reaches `Provisioned` condition, submit the JobSet. GKE Cluster Autoscaler will place the pods on the pre-provisioned nodes. - -**Flex-start provisioning** (alternative for fault-tolerant jobs): Uses `flex-start.gke.io` class. Provisions nodes as they become available rather than waiting for all at once -- better for training jobs with checkpointing. - -```yaml -spec: - provisioningClassName: flex-start.gke.io -``` - -## Cloud Batch vs GKE Jobs - -Cloud Batch is a managed batch service that handles job scheduling, queueing, and VM lifecycle without Kubernetes. GKE Jobs run inside a Kubernetes cluster you manage (or Autopilot manages). - -### When to use Cloud Batch - -- Stateless batch jobs that don't need to communicate with other running services -- Simple GPU/CPU batch jobs without complex inter-pod communication -- No existing GKE cluster (avoid cluster overhead for pure batch) -- Need managed job queue with automatic retries, priority scheduling, and cost optimization -- Single-node GPU jobs without distributed training requirements - -### When to use GKE Jobs - -- Already running workloads in GKE (reuse existing cluster, Workload Identity, secrets) -- Distributed training requiring pod-to-pod networking (NCCL, Gloo) -- Need AlloyDB/Cloud SQL sidecar connectivity (Auth Proxy sidecar pattern) -- Complex orchestration: JobSets, initContainers, sidecars, custom volumes -- Jobs that interact with other GKE services (queuing, result storage, notifications) -- Need MIG or time-sharing GPU configurations - -### Decision Table - -| Factor | Cloud Batch | GKE Jobs | -|---|---|---| -| Existing GKE cluster | No | Yes | -| Distributed training (multi-node) | Limited | Yes (JobSet, MPI) | -| DB sidecar connectivity | No | Yes | -| Job queue management | Built-in | Custom (HPA + metrics) | -| Spot/preemptible support | Yes | Yes | -| Startup time | ~2 min (VM cold start) | Seconds (existing nodes) or minutes (new nodes) | -| Cost for infrequent jobs | Lower (no cluster) | Higher (cluster overhead) | -| GPU types | All | All | -| Cost visibility | Per-job billing | Cluster-level billing | - -## Official References - -- -- -- -- -- diff --git a/plugins/flow/skills/gke/references/cloudsql-on-gke.md b/plugins/flow/skills/gke/references/cloudsql-on-gke.md deleted file mode 100644 index da61fc5..0000000 --- a/plugins/flow/skills/gke/references/cloudsql-on-gke.md +++ /dev/null @@ -1,238 +0,0 @@ -# Cloud SQL on GKE - -Production patterns for connecting GKE workloads to Cloud SQL using the Auth Proxy sidecar and Workload Identity. - -## Architecture - -The Cloud SQL Auth Proxy runs as a **sidecar container** in the same Pod. The proxy handles authentication (via Workload Identity) and TLS, and exposes Cloud SQL on `localhost:5432` (Postgres) or `localhost:3306` (MySQL). - -```text -Pod -├── app-container → localhost:5432 → cloud-sql-proxy → Cloud SQL (via proxy API) -└── cloud-sql-proxy (sidecar) -``` - -## Auth Proxy Sidecar Container Spec - -```yaml -- name: cloud-sql-proxy - image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.11.0 - args: - - "--structured-logs" - - "--port=5432" - - "PROJECT_ID:REGION:INSTANCE_NAME" # instance connection name - resources: - requests: - cpu: "0.5" - memory: "512Mi" - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - capabilities: - drop: [ALL] -``` - -Retrieve the instance connection name: - -```bash -gcloud sql instances describe INSTANCE_NAME \ - --project=PROJECT_ID \ - --format="value(connectionName)" -# Output: PROJECT_ID:REGION:INSTANCE_NAME -``` - -## Workload Identity Setup - -Same pattern as AlloyDB, but the GSA needs `roles/cloudsql.client` instead of `roles/alloydb.client`: - -```bash -# 1. Create the GCP Service Account -gcloud iam service-accounts create app-sa \ - --project=PROJECT_ID \ - --display-name="GKE App Service Account" - -# 2. Grant Cloud SQL client role -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:app-sa@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/cloudsql.client" - -# 3. Grant other required roles -for ROLE in roles/secretmanager.secretAccessor roles/logging.logWriter; do - gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:app-sa@PROJECT_ID.iam.gserviceaccount.com" \ - --role="${ROLE}" -done - -# 4. Create the Kubernetes Service Account -kubectl create serviceaccount app-ksa --namespace=NAMESPACE - -# 5. Annotate the KSA with the GCP SA email -kubectl annotate serviceaccount app-ksa \ - --namespace=NAMESPACE \ - iam.gke.io/gcp-service-account=app-sa@PROJECT_ID.iam.gserviceaccount.com - -# 6. Bind GCP SA to allow KSA impersonation -gcloud iam service-accounts add-iam-policy-binding \ - app-sa@PROJECT_ID.iam.gserviceaccount.com \ - --project=PROJECT_ID \ - --role="roles/iam.workloadIdentityUser" \ - --member="serviceAccount:PROJECT_ID.svc.id.goog[NAMESPACE/app-ksa]" -``` - -## Pod Security Context - -Use the same nonroot security context as AlloyDB patterns: - -```yaml -spec: - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - containers: - - name: app - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] -``` - -## Full Pod Spec Example - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: app -spec: - replicas: 2 - selector: - matchLabels: - app: app - template: - metadata: - labels: - app: app - spec: - serviceAccountName: app-ksa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - containers: - - name: app - image: us-central1-docker.pkg.dev/PROJECT_ID/repo/app:latest - ports: - - containerPort: 8080 - resources: - requests: - cpu: "1" - memory: "1Gi" - env: - - name: DATABASE_URL - value: "postgresql+asyncpg://app_user:$(DB_PASSWORD)@localhost:5432/appdb" - envFrom: - - secretRef: - name: app-secrets - readinessProbe: - httpGet: - path: /health - port: 8080 - initialDelaySeconds: 10 - periodSeconds: 10 - livenessProbe: - httpGet: - path: /health - port: 8080 - initialDelaySeconds: 30 - periodSeconds: 30 - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: [ALL] - - name: cloud-sql-proxy - image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.11.0 - args: - - "--structured-logs" - - "--port=5432" - - "PROJECT_ID:REGION:INSTANCE_NAME" - resources: - requests: - cpu: "0.5" - memory: "512Mi" - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - capabilities: - drop: [ALL] -``` - -## Connection String Formats - -**PostgreSQL (via TCP socket on localhost):** - -```text -postgresql+asyncpg://DB_USER:DB_PASSWORD@localhost:5432/DB_NAME -postgresql://DB_USER:DB_PASSWORD@localhost:5432/DB_NAME -``` - -**MySQL:** - -```text -mysql+aiomysql://DB_USER:DB_PASSWORD@localhost:3306/DB_NAME -``` - -The proxy always listens on `localhost` inside the pod. The proxy handles TLS to Cloud SQL -- the application connects over plain TCP on localhost (no SSL required in the connection string). - -## IAM Database Authentication (Optional) - -Instead of password-based auth, use IAM to authenticate directly with the Cloud SQL instance. The GSA email becomes the database user. - -```bash -# Create IAM database user -gcloud sql users create app-sa@PROJECT_ID.iam \ - --instance=INSTANCE_NAME \ - --project=PROJECT_ID \ - --type=CLOUD_IAM_SERVICE_ACCOUNT -``` - -Enable IAM auth on the proxy: - -```yaml -args: - - "--structured-logs" - - "--auto-iam-authn" # enable IAM authentication - - "--port=5432" - - "PROJECT_ID:REGION:INSTANCE_NAME" -``` - -Connection string uses the SA email (without `.gserviceaccount.com`) as the username and an empty password: - -```text -postgresql://app-sa@PROJECT_ID.iam@localhost:5432/DB_NAME -``` - -## Cloud SQL vs AlloyDB Decision - -| Criteria | Cloud SQL | AlloyDB | -|---|---|---| -| PostgreSQL compatibility | Full | Full (Postgres 14+) | -| Performance | Standard | 4x read, 2x write vs Cloud SQL | -| High availability | Multi-zone replica | Managed HA, read pools | -| Price | Lower | Higher | -| Vector search | pgvector extension | Built-in `google_ml_integration` | -| AI integrations | pgvector | AlloyDB AI (Vertex AI embed) | -| Best for | General workloads, cost-sensitive | High-throughput, AI/ML workloads | - -## Official References - -- -- -- -- diff --git a/plugins/flow/skills/gke/references/cluster.md b/plugins/flow/skills/gke/references/cluster.md deleted file mode 100644 index f22a4fa..0000000 --- a/plugins/flow/skills/gke/references/cluster.md +++ /dev/null @@ -1,55 +0,0 @@ -# GKE Cluster Management - -## Autopilot vs Standard Mode - -### Autopilot (Recommended) - -- Google manages nodes, scaling, security, upgrades. -- Pay per pod resource usage. -- Security best practices enforced by default. - -```bash -gcloud container clusters create-auto CLUSTER_NAME \ - --region=REGION -``` - -### Standard Mode - -- Full control over node configuration. -- Manual node pool management. -- Pay for node resources. - -```bash -gcloud container clusters create CLUSTER_NAME \ - --region=REGION \ - --num-nodes=3 \ - --machine-type=e2-medium -``` - ---- - -## Cluster Creation Types - -### Regional Cluster (Production) - -Control plane replicated across zones for high availability. - -```bash -gcloud container clusters create CLUSTER \ - --region=us-central1 \ - --num-nodes=2 -``` - -### Private Cluster - -Disables public IP addresses for nodes. - -```bash -gcloud container clusters create CLUSTER \ - --region=us-central1 \ - --enable-private-nodes \ - --enable-private-endpoint \ - --master-ipv4-cidr=172.16.0.0/28 \ - --network=VPC_NAME \ - --subnetwork=SUBNET_NAME -``` diff --git a/plugins/flow/skills/gke/references/gpu.md b/plugins/flow/skills/gke/references/gpu.md deleted file mode 100644 index 3cbc792..0000000 --- a/plugins/flow/skills/gke/references/gpu.md +++ /dev/null @@ -1,248 +0,0 @@ -# GPU/TPU Workloads on GKE - -## GPU Node Pool Creation - -```bash -# Standard cluster: create a GPU node pool -gcloud container node-pools create gpu-pool \ - --cluster=CLUSTER_NAME \ - --region=REGION \ - --machine-type=n1-standard-4 \ - --accelerator=type=nvidia-tesla-t4,count=1 \ - --node-taints=nvidia.com/gpu=present:NoSchedule \ - --num-nodes=0 \ - --enable-autoscaling \ - --min-nodes=0 \ - --max-nodes=10 -``` - -The taint `nvidia.com/gpu=present:NoSchedule` prevents non-GPU pods from consuming GPU node resources. - -## GPU Types Reference - -| GPU | Machine Series | vCPU Range | Memory | Use Case | -|---|---|---|---|---| -| NVIDIA T4 | N1 | 4–96 | up to 624 GB | Cost-effective inference | -| NVIDIA L4 | G2 | 4–48 | up to 192 GB | Efficient inference/fine-tuning | -| NVIDIA A100 40GB | A2 | 12–96 | up to 1360 GB | Large-scale training, MIG | -| NVIDIA A100 80GB | A2 Ultra | 12–96 | up to 1360 GB | Large-scale training, MIG | -| NVIDIA H100 80GB | A3 | 26–208 | up to 1872 GB | Highest throughput, MIG | - -## Autopilot vs Standard for GPU - -| Feature | Autopilot | Standard | -|---|---|---| -| Driver install | Automatic | Manual (DaemonSet or GPU Operator) | -| Billing | Pay-per-pod (not per node) | Pay-per-node | -| MIG | Enabled by default (v1.29.3+) | Manual configuration | -| Node management | Google-managed | User-managed | -| GPU Operator | Not needed | `helm install gpu-operator nvidia/gpu-operator` | - -**Standard driver install (DaemonSet):** - -```bash -kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/cos/daemonset-preloaded.yaml -``` - -**Standard driver install (GPU Operator):** - -```bash -helm repo add nvidia https://helm.ngc.nvidia.com/nvidia -helm repo update -helm install gpu-operator nvidia/gpu-operator \ - --namespace gpu-operator \ - --create-namespace -``` - -## Pod Spec - -GPU resources must be in `limits` only. Kubernetes treats GPU limits as implicit requests -- do not set requests separately or the pod will fail to schedule. - -```yaml -apiVersion: v1 -kind: Pod -metadata: - name: gpu-job -spec: - tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: cloud.google.com/gke-accelerator - operator: In - values: - - nvidia-tesla-t4 # or nvidia-l4, nvidia-tesla-a100, etc. - containers: - - name: trainer - image: nvcr.io/nvidia/pytorch:24.01-py3 - command: ["python", "train.py"] - resources: - limits: - nvidia.com/gpu: "1" # GPU in limits ONLY - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 65532 - capabilities: - drop: [ALL] - restartPolicy: Never -``` - -## Time-Sharing (Software-Level GPU Sharing) - -Time-sharing allows multiple pods to share a single GPU via software-level multiplexing. Not true hardware isolation -- pods contend for GPU time. - -```bash -# Enable time-sharing on a node pool -gcloud container node-pools create shared-gpu-pool \ - --cluster=CLUSTER_NAME \ - --region=REGION \ - --machine-type=n1-standard-4 \ - --accelerator=type=nvidia-tesla-t4,count=1,gpu-sharing-strategy=time-sharing,max-shared-clients-per-gpu=4 \ - --node-taints=nvidia.com/gpu=present:NoSchedule -``` - -Pods request fractional GPU implicitly -- each requests `nvidia.com/gpu: "1"` but the node allows up to `max-shared-clients-per-gpu` concurrent pods sharing that one GPU. - -**When to use:** Multiple small inference services, development environments, batch jobs with low GPU utilization. - -**When to avoid:** Training jobs that need full GPU bandwidth, latency-sensitive inference. - -## MIG (Multi-Instance GPU) - -MIG provides hardware partitioning at the GPU level. Supported on A100 and H100 only. Each MIG slice is an independent, isolated GPU partition with dedicated memory and compute. - -- A100 40GB: up to 7 MIG slices (smallest: 1g.5gb = 1 GPU instance, 5 GB memory) -- A100 80GB: up to 7 MIG slices -- H100 80GB: up to 7 MIG slices - -```bash -# Enable MIG on a node pool (A100 example) -gcloud container node-pools create mig-pool \ - --cluster=CLUSTER_NAME \ - --region=REGION \ - --machine-type=a2-highgpu-1g \ - --accelerator=type=nvidia-tesla-a100,count=1 \ - --node-taints=nvidia.com/gpu=present:NoSchedule -``` - -**Configure MIG profile via ConfigMap** (GPU Operator manages the rest): - -```yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: default-mig-parted-config - namespace: gpu-operator -data: - config.yaml: | - version: v1 - mig-configs: - all-1g.5gb: - - devices: all - mig-enabled: true - mig-devices: - "1g.5gb": 7 -``` - -**Combining MIG + time-sharing:** Enable MIG first to partition the GPU hardware, then enable time-sharing on each MIG slice for additional software-level multiplexing. - -```bash ---accelerator=type=nvidia-tesla-a100,count=1,gpu-sharing-strategy=time-sharing,max-shared-clients-per-gpu=2 -``` - -## Node Auto-Provisioning (NAP) for GPU - -NAP automatically creates GPU node pools when pods request GPU resources that no existing node pool satisfies. - -```bash -# Enable NAP with GPU support -gcloud container clusters update CLUSTER_NAME \ - --region=REGION \ - --enable-autoprovisioning \ - --max-cpu=96 \ - --max-memory=624 \ - --autoprovisioning-resource-limits=nvidia.com/gpu=8 -``` - -NAP reads pod `limits` (including `nvidia.com/gpu`) and creates a matching node pool automatically. The taint `nvidia.com/gpu=present:NoSchedule` is automatically applied to GPU node pools created by NAP. - -## Spot GPU - -Spot VMs offer 60-90% discount over on-demand for fault-tolerant GPU workloads (training with checkpointing, batch inference). - -```bash -gcloud container node-pools create spot-gpu-pool \ - --cluster=CLUSTER_NAME \ - --region=REGION \ - --machine-type=n1-standard-4 \ - --accelerator=type=nvidia-tesla-t4,count=1 \ - --spot \ - --node-taints=nvidia.com/gpu=present:NoSchedule,cloud.google.com/gke-spot=true:NoSchedule \ - --enable-autoscaling \ - --min-nodes=0 \ - --max-nodes=20 -``` - -Pod toleration for Spot: - -```yaml -tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - - key: cloud.google.com/gke-spot - operator: Exists - effect: NoSchedule -``` - -**Checkpointing is mandatory for Spot training jobs** -- Spot VMs can be preempted with 30-second notice. Use a persistent volume or Cloud Storage to save checkpoints periodically. - -## TPU Basics - -TPUs (Tensor Processing Units) are Google's custom ML accelerators, optimized for large transformer models. - -```yaml -resources: - limits: - google.com/tpu: "4" # Request 4 TPU chips (v4 pod = 4 chips) -``` - -**TPU topology selector:** - -```yaml -nodeSelector: - cloud.google.com/gke-tpu-topology: 2x2x1 # v4 TPU topology - cloud.google.com/gke-tpu-accelerator: tpu-v4-podslice -``` - -**Autopilot TPU** (v1.29+): Autopilot supports TPU v4 and v5e. No driver management required. - -**Standard TPU**: Requires dedicated TPU node pool with `--tpu-topology` flag. - -```bash -gcloud container node-pools create tpu-pool \ - --cluster=CLUSTER_NAME \ - --region=REGION \ - --machine-type=ct4p-hightpu-4t \ - --tpu-topology=2x2x1 \ - --num-nodes=1 -``` - -**Framework references for TPU workloads:** - -- **vLLM**: High-throughput LLM serving, supports TPU v4/v5e -- **JetStream**: Google's high-performance inference framework for TPU -- **KubeRay**: Ray cluster operator for distributed training and inference (`helm install kuberay-operator kuberay/kuberay-operator`) - -## Official References - -- -- -- -- -- diff --git a/plugins/flow/skills/gke/references/helm_deployment.md b/plugins/flow/skills/gke/references/helm_deployment.md deleted file mode 100644 index 3ec6cc8..0000000 --- a/plugins/flow/skills/gke/references/helm_deployment.md +++ /dev/null @@ -1,256 +0,0 @@ -# Helm Deployment Patterns for Litestar Apps - -Helm chart patterns for deploying Python ASGI applications (Litestar) with web servers and background workers on GKE. - -## Chart Structure - -```text -chart/ - Chart.yaml - values.yaml - templates/ - _helpers.tpl - web-deployment.yaml - web-service.yaml - worker-deployment.yaml # One per worker type or parameterized - migration-job.yaml - pvc.yaml # If using persistent storage - configmap.yaml - secrets.yaml -``` - -## Values Pattern: Per-Component Configuration - -Structure values with separate sections for each component (web, workers, persistence): - -```yaml -web: - replicaCount: 2 - image: - repository: us-central1-docker.pkg.dev/PROJECT/repo/app - tag: latest - port: 8080 - command: ["app", "server", "run", "--host", "0.0.0.0", "--port", "8080"] - resources: - requests: - cpu: 500m - memory: 512Mi - limits: - cpu: "2" - memory: 2Gi - -workers: - - name: default-worker - replicaCount: 2 - concurrency: 4 - queues: ["default", "push", "ingress"] - command: ["app", "server", "run-worker", "--queues", "default,push,ingress", "--concurrency", "4"] - resources: - requests: - cpu: 250m - memory: 256Mi - limits: - cpu: "1" - memory: 1Gi - - - name: scheduler-worker - replicaCount: 1 # Single replica only for scheduler queue - concurrency: 1 - queues: ["scheduler"] - command: ["app", "server", "run-worker", "--queues", "scheduler", "--concurrency", "1"] - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 500m - memory: 512Mi - -persistence: - enabled: true - storageClass: standard-rwo - size: 10Gi - accessMode: ReadWriteOnce -``` - -## Web Deployment - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Release.Name }}-web -spec: - replicas: {{ .Values.web.replicaCount }} - selector: - matchLabels: - app: {{ .Release.Name }} - component: web - template: - metadata: - labels: - app: {{ .Release.Name }} - component: web - spec: - serviceAccountName: {{ .Release.Name }}-sa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - containers: - - name: web - image: "{{ .Values.web.image.repository }}:{{ .Values.web.image.tag }}" - command: {{ toJson .Values.web.command }} - ports: - - containerPort: {{ .Values.web.port }} - protocol: TCP - resources: - {{- toYaml .Values.web.resources | nindent 12 }} - startupProbe: - httpGet: - path: /health - port: {{ .Values.web.port }} - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 30 - readinessProbe: - httpGet: - path: /health - port: {{ .Values.web.port }} - periodSeconds: 10 - failureThreshold: 3 - livenessProbe: - tcpSocket: - port: {{ .Values.web.port }} - periodSeconds: 30 - failureThreshold: 5 - terminationGracePeriodSeconds: 30 -``` - -## Worker Deployment - -Template that iterates over worker definitions: - -```yaml -{{- range .Values.workers }} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ $.Release.Name }}-{{ .name }} -spec: - replicas: {{ .replicaCount }} - selector: - matchLabels: - app: {{ $.Release.Name }} - component: {{ .name }} - template: - metadata: - labels: - app: {{ $.Release.Name }} - component: {{ .name }} - spec: - serviceAccountName: {{ $.Release.Name }}-sa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - containers: - - name: worker - image: "{{ $.Values.web.image.repository }}:{{ $.Values.web.image.tag }}" - command: {{ toJson .command }} - resources: - {{- toYaml .resources | nindent 12 }} - terminationGracePeriodSeconds: 60 -{{- end }} -``` - -## Health Probes - -Use different probe types for different checks: - -| Probe | Type | Target | Purpose | -|-------|------|--------|---------| -| **Startup** | HTTP GET | `/health` | Allow slow startup (e.g., DB migrations, model loading) | -| **Readiness** | HTTP GET | `/health` | Only receive traffic when healthy | -| **Liveness** | TCP Socket | Port | Restart if process is hung (avoids false positives from slow endpoints) | - -Startup probes with high `failureThreshold` (e.g., 30 x 5s = 150s) give applications time to initialize without being killed. - -## Database Migration Jobs - -Run migrations as Kubernetes Jobs before or alongside deployment: - -```yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ .Release.Name }}-migrate-{{ .Release.Revision }} - annotations: - helm.sh/hook: pre-upgrade,pre-install - helm.sh/hook-weight: "-5" - helm.sh/hook-delete-policy: before-hook-creation -spec: - backoffLimit: 3 - template: - spec: - serviceAccountName: {{ .Release.Name }}-sa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - containers: - - name: migrate - image: "{{ .Values.web.image.repository }}:{{ .Values.web.image.tag }}" - command: ["app", "manage", "upgrade-database"] - envFrom: - - secretRef: - name: {{ .Release.Name }}-secrets - restartPolicy: Never -``` - -## Persistence with Pod Affinity - -When using `ReadWriteOnce` PVCs, pods must be scheduled on the same node: - -```yaml -spec: - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ .Release.Name }} - topologyKey: kubernetes.io/hostname -``` - -## Security Context - -Always apply security contexts at both pod and container levels: - -```yaml -spec: - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - fsGroup: 65532 - containers: - - name: app - securityContext: - allowPrivilegeEscalation: false - readOnlyRootFilesystem: true - capabilities: - drop: ["ALL"] -``` - -## Official References - -- -- -- -- diff --git a/plugins/flow/skills/gke/references/kubectl.md b/plugins/flow/skills/gke/references/kubectl.md deleted file mode 100644 index a7112a7..0000000 --- a/plugins/flow/skills/gke/references/kubectl.md +++ /dev/null @@ -1,29 +0,0 @@ -# GKE kubectl Commands - -## Cluster Access - -```bash -# Get credentials -gcloud container clusters get-credentials CLUSTER --region=REGION - -# Switch context -kubectl config use-context CONTEXT_NAME -``` - -## Common Operations - -```bash -# Get nodes/pods -kubectl get nodes -kubectl get pods -A - -# Logs -kubectl logs POD_NAME -n NAMESPACE -kubectl logs -f POD_NAME -n NAMESPACE # follow - -# Exec -kubectl exec -it POD_NAME -n NAMESPACE -- /bin/sh - -# Apply -kubectl apply -f manifest.yaml -``` diff --git a/plugins/flow/skills/gke/references/networking.md b/plugins/flow/skills/gke/references/networking.md deleted file mode 100644 index e99a958..0000000 --- a/plugins/flow/skills/gke/references/networking.md +++ /dev/null @@ -1,68 +0,0 @@ -# GKE Networking - -## Services - -### ClusterIP (Internal) - -```yaml -apiVersion: v1 -kind: Service -metadata: - name: internal-service -spec: - type: ClusterIP - selector: - app: my-app - ports: [{ port: 80, targetPort: 8080 }] -``` - -### LoadBalancer (External) - -```yaml -apiVersion: v1 -kind: Service -metadata: - name: external-service - annotations: - cloud.google.com/neg: '{"ingress": true}' -spec: - type: LoadBalancer - selector: - app: my-app - ports: [{ port: 80, targetPort: 8080 }] -``` - -## Ingress (GCE) - -```yaml -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: my-ingress - annotations: - kubernetes.io/ingress.class: "gce" - kubernetes.io/ingress.global-static-ip-name: "my-static-ip" -spec: - rules: - - host: api.example.com - http: - paths: - - path: /* - pathType: ImplementationSpecific - backend: - service: { name: my-service, port: { number: 80 } } -``` - -## Network Policy - -```yaml -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy -metadata: - name: allow-frontend-to-backend -spec: - podSelector: - matchLabels: { app: backend } - ingress: - - from: [{ podSelector: { matchLabels: { app: frontend } } }] -``` diff --git a/plugins/flow/skills/gke/references/node_pools.md b/plugins/flow/skills/gke/references/node_pools.md deleted file mode 100644 index 84f6fff..0000000 --- a/plugins/flow/skills/gke/references/node_pools.md +++ /dev/null @@ -1,47 +0,0 @@ -# GKE Node Pools - -## Create Node Pool - -```bash -gcloud container node-pools create POOL_NAME \ - --cluster=CLUSTER_NAME \ - --region=REGION \ - --machine-type=e2-standard-4 \ - --num-nodes=3 \ - --enable-autoscaling \ - --min-nodes=1 \ - --max-nodes=10 -``` - -## Specialized Pools - -### GPU Node Pool - -```bash -gcloud container node-pools create gpu-pool \ - --cluster=CLUSTER_NAME \ - --region=REGION \ - --machine-type=n1-standard-8 \ - --accelerator=type=nvidia-tesla-t4,count=1 \ - --node-taints=nvidia.com/gpu=present:NoSchedule -``` - -### Spot VM Pool (Cost Savings) - -```bash -gcloud container node-pools create spot-pool \ - --cluster=CLUSTER_NAME \ - --region=REGION \ - --spot \ - --machine-type=e2-standard-4 -``` - -## Management Commands - -```bash -# Resize -gcloud container clusters resize CLUSTER --node-pool=POOL --num-nodes=5 --region=REGION - -# Update Autoscaling -gcloud container node-pools update POOL --cluster=CLUSTER --region=REGION --enable-autoscaling --min-nodes=1 --max-nodes=10 -``` diff --git a/plugins/flow/skills/gke/references/saq_workers.md b/plugins/flow/skills/gke/references/saq_workers.md deleted file mode 100644 index 24865de..0000000 --- a/plugins/flow/skills/gke/references/saq_workers.md +++ /dev/null @@ -1,180 +0,0 @@ -# SAQ Worker Deployment on GKE - -Patterns for deploying SAQ (Simple Async Queue) workers as Kubernetes deployments alongside Litestar web applications. - -## Architecture Overview - -SAQ workers process background tasks from Redis-backed queues. Each worker deployment handles one or more named queues with configurable concurrency. The Litestar web application enqueues tasks; workers consume them. - -```text -┌──────────────┐ enqueue ┌───────┐ dequeue ┌─────────────────┐ -│ Litestar │ ──────────────> │ Redis │ <────────────── │ SAQ Workers │ -│ Web App │ │ │ │ (K8s Deploys) │ -└──────────────┘ └───────┘ └─────────────────┘ -``` - -## Queue Distribution Strategy - -Distribute queues across worker deployments based on workload characteristics: - -| Queue | Purpose | Concurrency | Replicas | Notes | -|-------|---------|-------------|----------|-------| -| `default` | General background tasks | 4 | 2+ | Catch-all queue | -| `push` | Push notifications, webhooks | 4 | 2+ | I/O bound, higher concurrency OK | -| `ingress` | Data ingestion/processing | 2 | 2+ | May be CPU-heavy | -| `mailers` | Email sending | 4 | 1-2 | Rate-limited by SMTP provider | -| `pull` | External data fetching | 2 | 1-2 | Network-bound | -| `scheduler` | Periodic/cron tasks | 1 | **1 only** | Must be single replica | - -## Worker Deployment Configuration - -### General Workers (Multi-Queue) - -Group related queues into a single deployment: - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: app-worker-default -spec: - replicas: 2 - selector: - matchLabels: - app: myapp - component: worker-default - template: - metadata: - labels: - app: myapp - component: worker-default - spec: - serviceAccountName: myapp-sa - securityContext: - runAsNonRoot: true - runAsUser: 65532 - runAsGroup: 65532 - containers: - - name: worker - image: us-central1-docker.pkg.dev/PROJECT/repo/app:latest - command: - - "app" - - "server" - - "run-worker" - - "--queues" - - "default,push,ingress" - - "--concurrency" - - "4" - resources: - requests: - cpu: 250m - memory: 256Mi - limits: - cpu: "1" - memory: 1Gi - envFrom: - - secretRef: - name: myapp-secrets - terminationGracePeriodSeconds: 60 -``` - -### Scheduler Worker (Single Replica) - -The scheduler queue runs periodic/cron tasks and must be constrained to a single replica to prevent duplicate execution: - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: app-worker-scheduler -spec: - replicas: 1 # CRITICAL: Never scale beyond 1 - strategy: - type: Recreate # Avoid overlapping pods during rollout - selector: - matchLabels: - app: myapp - component: worker-scheduler - template: - metadata: - labels: - app: myapp - component: worker-scheduler - spec: - containers: - - name: worker - image: us-central1-docker.pkg.dev/PROJECT/repo/app:latest - command: - - "app" - - "server" - - "run-worker" - - "--queues" - - "scheduler" - - "--concurrency" - - "1" - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 500m - memory: 512Mi - terminationGracePeriodSeconds: 60 -``` - -## Worker Concurrency Configuration - -Concurrency controls how many tasks a single worker process handles simultaneously: - -- **CPU-bound tasks** (data processing, parsing): concurrency 1-2 -- **I/O-bound tasks** (API calls, email, notifications): concurrency 4-8 -- **Scheduler**: always concurrency 1 - -Set via the `--concurrency` CLI flag or environment variable: - -```bash -app server run-worker --queues default,push --concurrency 4 -``` - -## Graceful Shutdown - -Workers need time to finish in-progress tasks before termination: - -```yaml -spec: - terminationGracePeriodSeconds: 60 # Match or exceed max task duration -``` - -SAQ workers handle `SIGTERM` by: - -1. Stopping acceptance of new tasks -2. Waiting for in-progress tasks to complete -3. Exiting cleanly - -Set `terminationGracePeriodSeconds` to at least the maximum expected task duration. If tasks can run longer, implement checkpointing or use heartbeat-based stale detection to requeue interrupted tasks. - -## Stale Task Recovery - -Configure heartbeat and stale detection to handle worker crashes: - -| Setting | Default | Purpose | -|---------|---------|---------| -| `HEARTBEAT_INTERVAL` | 30s | How often running tasks send heartbeats | -| `STALE_AFTER_MINUTES` | 1.5min | Time without heartbeat before task is considered stale | -| `MAX_CONCURRENT_JOBS` | 4 | Maximum concurrent tasks per worker | - -The stale threshold must be at least 3x the heartbeat interval to avoid false positives. - -## In-Process vs Separate Workers - -SAQ workers can run in two modes: - -- **In-process** (`INPROCESS_WORKER=true`): Worker runs inside the Litestar web process. Suitable for development and small deployments. -- **Separate process** (`INPROCESS_WORKER=false`): Worker runs as a dedicated Kubernetes deployment. Required for production to isolate background task failures from web serving. - -## Official References - -- -- -- -- diff --git a/plugins/flow/skills/gke/references/security.md b/plugins/flow/skills/gke/references/security.md deleted file mode 100644 index 90c47e1..0000000 --- a/plugins/flow/skills/gke/references/security.md +++ /dev/null @@ -1,46 +0,0 @@ -# GKE Security - -## Cluster Hardening - -```bash -# Enable Shielded Nodes -gcloud container clusters create CLUSTER \ - --enable-shielded-nodes \ - --shielded-secure-boot - -# Enable Binary Authorization -gcloud container clusters update CLUSTER --enable-binauthz -``` - -## Pod Security Context - -```yaml -spec: - securityContext: - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: { type: RuntimeDefault } - containers: - - name: app - securityContext: - allowPrivilegeEscalation: false - readOnlyRootFilesystem: true - capabilities: { drop: [ALL] } -``` - -## Secret Manager Integration - -```bash -# Enable Secret Manager add-on -gcloud container clusters update CLUSTER --enable-secret-manager -``` - -```yaml -# Mount secrets via CSI -spec: - volumes: - - name: secrets - csi: - driver: secrets-store.csi.k8s.io - volumeAttributes: { secretProviderClass: "gcp-secrets" } -``` diff --git a/plugins/flow/skills/gke/references/terraform.md b/plugins/flow/skills/gke/references/terraform.md deleted file mode 100644 index d3fef1f..0000000 --- a/plugins/flow/skills/gke/references/terraform.md +++ /dev/null @@ -1,47 +0,0 @@ -# GKE Terraform Configuration - -## Autopilot Cluster - -```hcl -module "gke_autopilot" { - source = "terraform-google-modules/kubernetes-engine/google//modules/beta-autopilot-private-cluster" - version = "~> 31.0" - - project_id = var.project_id - name = "autopilot-cluster" - region = "us-central1" - network = google_compute_network.vpc.name - subnetwork = google_compute_subnetwork.subnet.name - ip_range_pods = "pods" - ip_range_services = "services" - - enable_private_endpoint = false - enable_private_nodes = true - master_ipv4_cidr_block = "172.16.0.0/28" -} -``` - -## Standard Cluster with Node Pools - -```hcl -module "gke" { - source = "terraform-google-modules/kubernetes-engine/google//modules/private-cluster" - version = "~> 31.0" - - # ... network config ... - - node_pools = [ - { - name = "default-pool" - machine_type = "e2-standard-4" - min_count = 1 - max_count = 10 - }, - { - name = "spot-pool" - machine_type = "e2-standard-4" - spot = true - } - ] -} -``` diff --git a/plugins/flow/skills/gke/references/troubleshooting.md b/plugins/flow/skills/gke/references/troubleshooting.md deleted file mode 100644 index 3eb059d..0000000 --- a/plugins/flow/skills/gke/references/troubleshooting.md +++ /dev/null @@ -1,35 +0,0 @@ -# GKE Troubleshooting - -## Node Issues - -```bash -# Check node status and conditions -kubectl get nodes -kubectl describe node NODE_NAME - -# Drain node for maintenance -kubectl drain NODE_NAME --ignore-daemonsets --delete-emptydir-data -``` - -## Pod Issues - -```bash -# Describe pod for events/errors -kubectl describe pod POD_NAME -n NAMESPACE - -# Check events -kubectl get events -n NAMESPACE --sort-by='.lastTimestamp' - -# Debug with ephemeral container -kubectl debug -it POD_NAME --image=busybox -n NAMESPACE -``` - -## Networking Issues - -```bash -# Test DNS resolution -kubectl run -it --rm debug --image=busybox -- nslookup kubernetes - -# Test service connectivity -kubectl run -it --rm debug --image=busybox -- wget -qO- http://service-name -``` diff --git a/plugins/flow/skills/gke/references/workload_identity.md b/plugins/flow/skills/gke/references/workload_identity.md deleted file mode 100644 index b53ac5e..0000000 --- a/plugins/flow/skills/gke/references/workload_identity.md +++ /dev/null @@ -1,112 +0,0 @@ -# GKE Workload Identity - -Workload Identity is the recommended way for GKE workloads to access Google Cloud APIs securely. - -## 1. Enable on Cluster - -```bash -# New cluster -gcloud container clusters create CLUSTER --workload-pool=PROJECT_ID.svc.id.goog - -# Existing cluster -gcloud container clusters update CLUSTER --workload-pool=PROJECT_ID.svc.id.goog -``` - -## 2. Configure Binding - -```bash -# Create Google Service Account (GSA) -gcloud iam service-accounts create GSA_NAME - -# Grant GSA permissions -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:GSA_NAME@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/storage.admin" - -# Create Kubernetes Service Account (KSA) -kubectl create serviceaccount KSA_NAME --namespace NAMESPACE - -# Bind KSA to GSA -gcloud iam service-accounts add-iam-policy-binding \ - GSA_NAME@PROJECT_ID.iam.gserviceaccount.com \ - --role="roles/iam.workloadIdentityUser" \ - --member="serviceAccount:PROJECT_ID.svc.id.goog[NAMESPACE/KSA_NAME]" - -# Annotate KSA -kubectl annotate serviceaccount KSA_NAME \ - --namespace=NAMESPACE \ - iam.gke.io/gcp-service-account=GSA_NAME@PROJECT_ID.iam.gserviceaccount.com -``` - -## 3. Pod Configuration - -```yaml -apiVersion: v1 -kind: Pod -metadata: - name: my-app - namespace: my-namespace -spec: - serviceAccountName: my-ksa # KSA with Workload Identity -``` - -## 4. Cloud Run to GKE Communication - -When Cloud Run services need to communicate with GKE workloads (or vice versa), configure cross-service identity: - -### Cloud Run Service Account Setup - -```bash -# Create a dedicated GSA for the Cloud Run service -gcloud iam service-accounts create cloudrun-sa \ - --display-name="Cloud Run Service Account" - -# Deploy Cloud Run with the GSA -gcloud run deploy SERVICE \ - --service-account=cloudrun-sa@PROJECT_ID.iam.gserviceaccount.com -``` - -### Grant Cloud Run Access to GKE Resources - -```bash -# Allow Cloud Run's GSA to act as a GKE workload identity -gcloud iam service-accounts add-iam-policy-binding \ - GKE_GSA@PROJECT_ID.iam.gserviceaccount.com \ - --role="roles/iam.serviceAccountTokenCreator" \ - --member="serviceAccount:cloudrun-sa@PROJECT_ID.iam.gserviceaccount.com" - -# Or grant specific roles directly -gcloud projects add-iam-policy-binding PROJECT_ID \ - --member="serviceAccount:cloudrun-sa@PROJECT_ID.iam.gserviceaccount.com" \ - --role="roles/container.developer" -``` - -### Common IAM Roles for Cross-Service Access - -| Role | Purpose | -|------|---------| -| `roles/iam.workloadIdentityUser` | Allow KSA to impersonate GSA | -| `roles/iam.serviceAccountTokenCreator` | Allow one GSA to create tokens for another | -| `roles/container.developer` | Access GKE resources (pods, services) | -| `roles/run.invoker` | Allow GKE workloads to invoke Cloud Run services | - -## 5. Troubleshooting - -```bash -# Verify Workload Identity binding -gcloud iam service-accounts get-iam-policy GSA_NAME@PROJECT_ID.iam.gserviceaccount.com - -# Check KSA annotation -kubectl describe serviceaccount KSA_NAME -n NAMESPACE - -# Test from a pod -kubectl run test-wi --image=google/cloud-sdk:slim --rm -it \ - --serviceaccount=KSA_NAME --namespace=NAMESPACE \ - -- gcloud auth list -``` - -## Official References - -- -- -- diff --git a/plugins/flow/skills/granian/SKILL.md b/plugins/flow/skills/granian/SKILL.md deleted file mode 100644 index ca5ebc2..0000000 --- a/plugins/flow/skills/granian/SKILL.md +++ /dev/null @@ -1,241 +0,0 @@ ---- -name: granian -description: "Use when deploying ASGI, WSGI, or RSGI apps with Granian, editing granian CLI commands, worker or thread settings, SSL, HTTP/2, backpressure, or replacing uvicorn for production." ---- - -# Granian Server Skill - -Granian is a high-performance Rust-based ASGI/WSGI/RSGI server. Built on Rust's hyper and tokio for maximum performance, it is the preferred server for all production deployments over uvicorn. - -For Litestar integration, see `flow:litestar` → deployment section (`GranianPlugin` provides zero-config integration). - -## Quick Reference - -### CLI Usage - -```bash -# Basic ASGI (Litestar, Starlette, FastAPI) -granian app:main --interface asgi --host 0.0.0.0 --port 8000 - -# RSGI (Granian-native, highest performance) -granian app:main --interface rsgi --host 0.0.0.0 --port 8000 - -# WSGI (Flask, Django) -granian app:main --interface wsgi --host 0.0.0.0 --port 8000 -``` - -### Worker Configuration - -```bash -# Production: match workers to CPU cores -granian app:main --interface asgi \ - --workers 4 \ - --threads 2 \ - --threading-mode runtime - -# Development: single worker with reload -granian app:main --interface asgi --workers 1 --reload -``` - -### Interface Options - -| Interface | Use For | Notes | -|-----------|---------|-------| -| `asgi` | Litestar, Starlette, FastAPI | Standard ASGI spec | -| `rsgi` | Granian-native apps | Highest performance, Granian-specific | -| `wsgi` | Flask, Django | Sync frameworks | - -### Binding and Paths - -```bash -granian app:main \ - --host 0.0.0.0 \ - --port 8000 \ - --url-path-prefix /api -``` - -### SSL Configuration - -```bash -granian app:main --interface asgi \ - --host 0.0.0.0 \ - --port 8443 \ - --ssl-certfile /etc/ssl/certs/app.crt \ - --ssl-keyfile /etc/ssl/private/app.key -``` - -### HTTP Version - -```bash -# Support both HTTP/1.1 and HTTP/2 (recommended for production) -granian app:main --http auto - -# HTTP/2 only -granian app:main --http 2 - -# HTTP/1.1 only -granian app:main --http 1 -``` - -### Backpressure and Concurrency - -```bash -# Limit max concurrent connections to prevent overload -granian app:main --backpressure 1000 -``` - -### Logging - -```bash -# Structured JSON logging with access log -granian app:main \ - --log-level info \ - --access-log \ - --log-access-fmt json -``` - -### Granian vs Uvicorn Comparison - -| Feature | Granian | Uvicorn | -|---------|---------|---------| -| Core language | Rust (hyper + tokio) | Python | -| RSGI support | Yes (native) | No | -| HTTP/2 native | Yes | No (via h2 package) | -| Threading model | `workers` or `runtime` | GIL-bound workers | -| Performance | Higher throughput | Moderate | -| Memory footprint | Lower | Higher | -| Production default | Preferred | Acceptable fallback | - - - -## Workflow - -### Step 1: Install Granian - -```bash -pip install granian -``` - -### Step 2: Configure Interface Based on Framework - -Choose the interface flag matching the framework: - -- `--interface asgi` for Litestar, Starlette, FastAPI -- `--interface rsgi` for Granian-native apps (highest performance) -- `--interface wsgi` for Flask or Django - -### Step 3: Set Workers and Threads for Deployment Target - -Match `--workers` to available CPU cores. Use `--threading-mode runtime` for async workloads (ASGI/RSGI). Use `--threading-mode workers` for CPU-bound sync workloads. - -```bash -# Typical production formula -granian app:main \ - --interface asgi \ - --workers $(nproc) \ - --threads 2 \ - --threading-mode runtime -``` - -### Step 4: Add SSL for Production - -Always terminate SSL at granian or a reverse proxy. Prefer granian-native SSL for containerized deployments without an external proxy. - -```bash -granian app:main \ - --ssl-certfile /run/secrets/tls.crt \ - --ssl-keyfile /run/secrets/tls.key -``` - -### Step 5: Test Under Load - -Verify configuration with a load test before going live. Tune `--backpressure` to match expected peak concurrency without exhausting system resources. - - - - - -## Guardrails - -- **Use `--interface asgi` for ASGI frameworks** -- Litestar, Starlette, and FastAPI require `asgi`. Using `rsgi` with a pure ASGI app will fail at runtime. -- **Match `--workers` to CPU cores for production** -- under-provisioned workers waste hardware; over-provisioned workers increase memory pressure without throughput gains. -- **Use `--threading-mode runtime` for async workloads** -- runtime mode maps threads to the tokio runtime, giving better async scheduling than `workers` mode for I/O-heavy apps. -- **Prefer Granian over Uvicorn for all production deployments** -- Granian provides higher throughput, lower memory use, and native HTTP/2 support with no additional packages. -- **Set `--backpressure` to prevent overload under high traffic** -- without a limit, unbounded queuing leads to memory exhaustion and cascading timeouts. -- **Set `--http auto` to support both HTTP/1.1 and HTTP/2** -- most load balancers and clients expect HTTP/1.1 fallback even when HTTP/2 is preferred. -- **Never pin to `--http 2` alone in mixed-client environments** -- clients that do not support HTTP/2 will receive connection errors. - - - - - -### Validation Checkpoint - -Before delivering a Granian deployment configuration, verify: - -- [ ] `--interface` matches the framework (asgi/rsgi/wsgi) -- [ ] `--workers` is set to CPU core count (or a documented reason for deviation) -- [ ] `--threading-mode runtime` is used for async (ASGI/RSGI) workloads -- [ ] `--http auto` is set unless there is a specific reason to restrict HTTP version -- [ ] `--backpressure` is set for production deployments -- [ ] SSL flags are present for any publicly exposed production service -- [ ] Granian is used instead of uvicorn (or a reason is documented) - - - - - -## Example - -**Task:** Production deployment of a Litestar ASGI app on an 8-core host with SSL and structured logging. - -```bash -granian app:main \ - --interface asgi \ - --host 0.0.0.0 \ - --port 8443 \ - --workers 8 \ - --threads 2 \ - --threading-mode runtime \ - --http auto \ - --backpressure 2000 \ - --ssl-certfile /etc/ssl/certs/app.crt \ - --ssl-keyfile /etc/ssl/private/app.key \ - --log-level info \ - --access-log \ - --log-access-fmt json -``` - -For zero-config integration with Litestar, use `GranianPlugin`: - -```python -from litestar import Litestar -from litestar.plugins.granian import GranianPlugin - -app = Litestar( - route_handlers=[...], - plugins=[GranianPlugin()], -) -``` - -Then run via the Litestar CLI: - -```bash -litestar --app app:app run --host 0.0.0.0 --port 8000 -``` - - - ---- - -## Official References - -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/granian/agents/openai.yaml b/plugins/flow/skills/granian/agents/openai.yaml deleted file mode 100644 index 88867dd..0000000 --- a/plugins/flow/skills/granian/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Granian" - short_description: "Granian ASGI/WSGI/RSGI deployment, worker, thread, SSL, and HTTP settings" diff --git a/plugins/flow/skills/htmx/SKILL.md b/plugins/flow/skills/htmx/SKILL.md deleted file mode 100644 index f02d8d1..0000000 --- a/plugins/flow/skills/htmx/SKILL.md +++ /dev/null @@ -1,291 +0,0 @@ ---- -name: htmx -description: "Use when editing hx-* attributes, building HTMX hypermedia flows, returning partial HTML responses, setting HTMX response headers, or rendering server-side .html templates." ---- - -# HTMX Skill - - - -## Quick Reference - -### Core Attributes - - - -```html - - - - - - - - - - -
Replace content
-
Replace element
-
Append
-
Prepend
-
Delete element
- - - -
Polling
- -``` - -
- -### OOB (Out of Band) Swaps - - - -```html - -
- Main content here -
-
- New notification! -
-
42
-``` - -
- -### Forms - - - -```html -
- - - - - - -
- - - -``` - -
- -### Indicators - - - -```html - - - -``` - - - -### Boosted Links - - - -```html - -
- Page 1 - Page 2 -
- - -Navigate -``` - -
- -### Events - - - -```html - -
- Waiting for event... -
- - - - - -``` - -
- -### Extensions - - - -```html - - - -
- - - - - -
- Live updates here -
- - -
-
- - -
-``` - -
- -### Headers & CSRF - - - -```html - - - - -``` - - - -### Confirm & Prompt - - - -```html - - - -``` - - - -## Server Response Headers - - - -```python -# Python example -response.headers["HX-Redirect"] = "/new-page" -response.headers["HX-Refresh"] = "true" -response.headers["HX-Trigger"] = "itemCreated" -response.headers["HX-Trigger-After-Swap"] = "formReset" -response.headers["HX-Reswap"] = "outerHTML" -response.headers["HX-Retarget"] = "#new-target" -``` - - - -## Best Practices - -- Return partial HTML, not full pages -- Use `hx-swap-oob` for updating multiple elements -- Add loading indicators for slow operations -- Use `hx-boost` for progressive enhancement -- Include CSRF tokens in headers -- Use semantic HTML for accessibility - -
- -## References Index - -- **[Litestar-Vite Integration](references/litestar_vite.md)** — Backend integration with Litestar-Vite plugin. - -## Deployment - -### Hypermedia Strategy - -HTMX applications are deployed bundled with their backend engine (e.g., Litestar). Deployment involves standard backend containerization or server hosting. - -### Static Assets - -Ensure `htmx.min.js` and desired 2.x extensions are bundle-copied to the backend static directory. - ---- - -## CI/CD Actions - -Example GitHub Actions workflow targeting Backend Tests ensuring partial content returns: - -```yaml -name: Backend CI -on: [push, pull_request] - -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 - - run: pip install -r requirements.txt - - run: pytest tests/ # Verify handlers return partial html correctly -``` - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [HTMX](https://github.com/cofin/flow/blob/main/templates/styleguides/frameworks/htmx.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -Add guardrails instructions here. - - - -## Validation - -Add validation instructions here. - diff --git a/plugins/flow/skills/htmx/agents/openai.yaml b/plugins/flow/skills/htmx/agents/openai.yaml deleted file mode 100644 index bba0dfe..0000000 --- a/plugins/flow/skills/htmx/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "HTMX" - short_description: "HTMX attributes, partial HTML responses, server headers, and templates" diff --git a/plugins/flow/skills/htmx/references/litestar_vite.md b/plugins/flow/skills/htmx/references/litestar_vite.md deleted file mode 100644 index 1b34bb4..0000000 --- a/plugins/flow/skills/htmx/references/litestar_vite.md +++ /dev/null @@ -1,66 +0,0 @@ -# Litestar-Vite Integration - -## Setup with VitePlugin - -```python -# Python backend -from litestar import Litestar -from litestar_vite import ViteConfig, VitePlugin - -vite_config = ViteConfig( - mode="htmx", # HTMX mode for partials - paths=PathConfig(resource_dir="src"), -) - -app = Litestar(plugins=[VitePlugin(config=vite_config)]) -``` - -## HTMX Helpers from litestar-vite-plugin - -```typescript -import { - addDirective, - registerHtmxExtension, - setHtmxDebug, - swapJson, -} from 'litestar-vite-plugin/helpers/htmx'; - -// Register custom extension -registerHtmxExtension('my-ext', { - onEvent: (name, evt) => { ... } -}); - -// Enable debug mode -setHtmxDebug(true); - -// Add custom directive -addDirective('confirm', (element, value) => { - element.setAttribute('hx-confirm', value); -}); - -// Swap JSON response into DOM -swapJson(targetEl, jsonData, 'innerHTML'); -``` - -## Server-Side HTMX Responses - -```python -from litestar import get -from litestar.response import Template - -@get("/partials/items") -async def get_items_partial() -> Template: - items = await fetch_items() - return Template( - "partials/items.html", - context={"items": items}, - ) -``` - -## CLI Commands - -```bash -litestar assets install # Install deps -litestar assets serve # Dev server with HMR -litestar assets build # Production build -``` diff --git a/plugins/flow/skills/inertia/SKILL.md b/plugins/flow/skills/inertia/SKILL.md deleted file mode 100644 index f98dd6d..0000000 --- a/plugins/flow/skills/inertia/SKILL.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -name: inertia -description: "Use when building Inertia.js apps, editing createInertiaApp, server-side routed SPAs, Inertia protocol responses, page components, shared props, or Litestar/Inertia integrations." ---- - -# Inertia.js Skill - -## Overview - -Inertia.js bridges server-side routing with client-side SPA rendering. This skill covers the Inertia protocol, React/Vue adapters, forms, shared data, partial reloads, lazy props, SSR, and the full Litestar-Vite integration including backend setup, response helpers, type generation, and v2 features. - ---- - - - -## References Index - -For detailed guides and configuration examples, refer to the following documents in `references/`: - -- **[Protocol & Client-Side](references/protocol.md)** - - Inertia protocol, React/Vue adapters, forms, shared data, partial reloads, lazy props, SSR, and best practices. -- **[Litestar Integration](references/litestar_integration.md)** - - Python backend setup, Inertia response helpers, Vite config, frontend setup, generated page props types, Inertia v2 features, and CLI commands. - - - ---- - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Inertia](https://github.com/cofin/flow/blob/main/templates/styleguides/frameworks/inertia.md) -- [TypeScript](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/typescript.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -Add guardrails instructions here. - - - -## Validation - -Add validation instructions here. - - - -## Example - -Add example instructions here. - diff --git a/plugins/flow/skills/inertia/agents/openai.yaml b/plugins/flow/skills/inertia/agents/openai.yaml deleted file mode 100644 index 560e8c8..0000000 --- a/plugins/flow/skills/inertia/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Inertia.js" - short_description: "Inertia pages, protocol responses, Litestar integration, and server-routed SPAs" diff --git a/plugins/flow/skills/inertia/references/litestar_integration.md b/plugins/flow/skills/inertia/references/litestar_integration.md deleted file mode 100644 index 49b7256..0000000 --- a/plugins/flow/skills/inertia/references/litestar_integration.md +++ /dev/null @@ -1,166 +0,0 @@ -# Litestar-Vite Integration (Comprehensive) - -## Python Backend Setup - -```python -from litestar import Litestar, get -from litestar_vite import ViteConfig, VitePlugin, PathConfig, TypeGenConfig -from litestar_vite.inertia import InertiaPlugin, InertiaConfig, InertiaResponse - -vite_config = ViteConfig( - mode="hybrid", # Inertia mode - paths=PathConfig(resource_dir="resources"), # Laravel-style - types=TypeGenConfig( - enabled=True, - generate_page_props=True, # Generate Inertia page props types - output="resources/generated", - ), -) - -inertia_config = InertiaConfig( - root_template="base.html", -) - -app = Litestar( - plugins=[ - VitePlugin(config=vite_config), - InertiaPlugin(config=inertia_config), - ], -) -``` - -## Inertia Response Helpers - -```python -from litestar_vite.inertia import ( - InertiaResponse, - share, # Share data across all responses - lazy, # Load prop only when requested - defer, # Load prop after initial render - merge, # Merge with existing data - flash, # Flash message - error, # Validation error - only, # Only include specific props - except_, # Exclude specific props - clear_history, # Clear browser history - scroll_props, # Control scroll behavior -) - -@get("/users") -async def users_page() -> InertiaResponse: - return InertiaResponse( - "Users/Index", - props={ - "users": await fetch_users(), - "stats": defer(lambda: fetch_stats()), # Loaded after render - }, - ) - -@get("/dashboard") -async def dashboard(request: Request) -> InertiaResponse: - share(request, "auth", {"user": request.user}) - return InertiaResponse("Dashboard", props={...}) -``` - -## Vite Config - -```typescript -// vite.config.ts -import { defineConfig } from 'vite'; -import react from '@vitejs/plugin-react'; // or vue, svelte -import { litestarVitePlugin } from 'litestar-vite-plugin'; - -export default defineConfig({ - plugins: [ - react(), - litestarVitePlugin({ - input: ['resources/app.tsx'], - ssr: 'resources/ssr.tsx', // Optional SSR entry - }), - ], -}); -``` - -## Frontend Setup (React) - -```tsx -// resources/app.tsx -import { createInertiaApp } from '@inertiajs/react'; -import { createRoot, hydrateRoot } from 'react-dom/client'; -import { - resolvePageComponent, - unwrapPageProps, -} from 'litestar-vite-plugin/inertia-helpers'; - -createInertiaApp({ - resolve: (name) => resolvePageComponent( - name, - import.meta.glob('./pages/**/*.tsx'), - ), - setup({ el, App, props }) { - // Unwrap props for cleaner access - const cleanProps = unwrapPageProps(props); - - if (el.hasChildNodes()) { - hydrateRoot(el, ); - } else { - createRoot(el).render(); - } - }, -}); -``` - -## Generated Page Props Types - -```typescript -// resources/generated/inertia-pages.d.ts (auto-generated) -declare module '@inertiajs/react' { - interface PageProps { - auth: { user: User | null }; - flash: { success?: string; error?: string }; - } -} - -// Type-safe page component -import { usePage } from '@inertiajs/react'; - -export default function Dashboard() { - const { auth, flash } = usePage().props; - // auth and flash are fully typed! -} -``` - -## Inertia v2 Features - -```python -# Precognition (form validation preview) -from litestar_vite.inertia import precognition - -@post("/users") -@precognition # Enable precognition for this route -async def create_user(data: CreateUserDTO) -> InertiaResponse: - user = await save_user(data) - return InertiaResponse.redirect("/users") - -# History encryption -inertia_config = InertiaConfig( - encrypt_history=True, # Encrypt browser history state -) - -# Clear history on sensitive pages -@get("/login") -async def login_page() -> InertiaResponse: - return InertiaResponse( - "Auth/Login", - clear_history=True, - ) -``` - -## CLI Commands - -```bash -litestar assets install # Install deps -litestar assets serve # Dev server -litestar assets build # Production build -litestar assets generate-types # Generate page props types -``` diff --git a/plugins/flow/skills/inertia/references/protocol.md b/plugins/flow/skills/inertia/references/protocol.md deleted file mode 100644 index ace1423..0000000 --- a/plugins/flow/skills/inertia/references/protocol.md +++ /dev/null @@ -1,193 +0,0 @@ -# Inertia Protocol & Client-Side Reference - -## Inertia Protocol - -Inertia bridges server-side routing with client-side rendering: - -1. **Initial Request**: Server returns full HTML with page data -2. **Subsequent Requests**: XHR with `X-Inertia` header, server returns JSON -3. **Page Component**: Client renders component with props from server - -## React Adapter - -```tsx -// app.tsx - Setup -import { createInertiaApp } from '@inertiajs/react'; -import { createRoot } from 'react-dom/client'; - -createInertiaApp({ - resolve: (name) => { - const pages = import.meta.glob('./pages/**/*.tsx', { eager: true }); - return pages[`./pages/${name}.tsx`]; - }, - setup({ el, App, props }) { - createRoot(el).render(); - }, -}); - -// pages/Users/Index.tsx - Page component -import { Head, Link, usePage } from '@inertiajs/react'; - -interface Props { - users: User[]; -} - -export default function UsersIndex({ users }: Props) { - return ( - <> - -

Users

- {users.map(user => ( - - {user.name} - - ))} - - ); -} -``` - -## Vue Adapter - -```vue - - - - - - - -``` - -## Forms - -```tsx -import { useForm } from '@inertiajs/react'; - -function CreateUser() { - const { data, setData, post, processing, errors } = useForm({ - name: '', - email: '', - }); - - const submit = (e: FormEvent) => { - e.preventDefault(); - post('/users'); - }; - - return ( -
- setData('name', e.target.value)} - /> - {errors.name && {errors.name}} - - - - ); -} -``` - -## Shared Data - -```tsx -// Access shared data from server -import { usePage } from '@inertiajs/react'; - -function Layout({ children }) { - const { auth, flash } = usePage().props; - - return ( -
- {flash.success && {flash.success}} - {auth.user ? {auth.user.name} : Login} - {children} -
- ); -} -``` - -## Partial Reloads - -```tsx -import { router } from '@inertiajs/react'; - -// Only reload specific props -router.reload({ only: ['users'] }); - -// Reload with preserved scroll -router.reload({ preserveScroll: true }); - -// Reload with preserved state -router.reload({ preserveState: true }); -``` - -## Lazy Loading Props - -```python -# Server-side (Python example) -def get_users(): - return InertiaResponse( - "Users/Index", - props={ - "users": lazy(lambda: fetch_users()), # Only loaded when needed - "stats": defer(lambda: fetch_stats()), # Loaded after initial render - } - ) -``` - -## SSR Setup - -```tsx -// ssr.tsx -import { createInertiaApp } from '@inertiajs/react'; -import ReactDOMServer from 'react-dom/server'; - -export function render(page) { - return createInertiaApp({ - page, - render: ReactDOMServer.renderToString, - resolve: (name) => require(`./pages/${name}`), - setup: ({ App, props }) => , - }); -} -``` - -## Best Practices - -- Use `preserveState` for filter/pagination changes -- Use `only` for partial reloads to reduce payload -- Use `lazy` for expensive props that aren't always needed -- Use `defer` for non-critical data that can load after -- Handle flash messages in layout component -- Use `Head` component for SEO diff --git a/plugins/flow/skills/integrating-agent-platforms/SKILL.md b/plugins/flow/skills/integrating-agent-platforms/SKILL.md deleted file mode 100644 index 14c3a79..0000000 --- a/plugins/flow/skills/integrating-agent-platforms/SKILL.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -name: integrating-agent-platforms -description: "Use when installing, updating, packaging, or troubleshooting Flow integrations across Claude Code, Gemini CLI, Codex CLI, OpenCode, Cursor, VS Code/Copilot, OpenClaw, or Google Antigravity." ---- - -# Integrating Agent Platforms - -## Overview - -Use official host-native install and update flows first. Keep the shared mental model consistent across hosts: install source, update path, cache behavior, local-vs-shared scope, and restart requirements. - - - -1. Prefer the platform's official marketplace, extension, or plugin system. -2. Use git-backed installs where the host officially supports them. -3. Reserve local links and wrapper files for development or hosts without a first-class git install story. -4. Explain what is copied, what is linked, what is cached, and when a restart is required. - -- **Claude Code:** Prefer marketplace install and marketplace update commands. -- **Gemini CLI:** Prefer `gemini extensions install` from GitHub and `gemini extensions update`. Use `link` only for local development. -- **Codex CLI:** Treat the plugin manifest and marketplace as the source of truth. Distinguish repo marketplace metadata from the installed cached copy. -- **OpenCode:** Follow local plugin directory and skills discovery rules. Do not imply undocumented git-url plugin installs are the default. -- **Google Antigravity:** Prefer workspace-local `.agents` customization when supported by the current build; keep global fallback guidance available. - - - - - -- Prefer user-scoped installs for personal tooling. -- Use project-scoped or workspace-scoped registration only when the team should inherit it. -- When local-only ignores are needed, prefer `.git/info/exclude` before `.gitignore`. -- Do not present undocumented install paths as if they were official. -- Distinguish source checkout, installed copy, and cache behavior when the host does. - - - - - -Before giving host-integration guidance, verify: - -- [ ] The install path is host-native when one exists -- [ ] Update/refresh commands are current for the target host -- [ ] Scope is clear: user, project, workspace, or local -- [ ] Cache/copy/link behavior is explained when it affects updates -- [ ] Restart requirements are called out when relevant - - - - - -Example framing: - -- "Gemini CLI should use the GitHub-backed extension install flow. Use `link` only for local development against a checkout." - - - -## Reference - -Read [references/host-matrix.md](references/host-matrix.md) when you need exact host-by-host guidance. diff --git a/plugins/flow/skills/integrating-agent-platforms/agents/openai.yaml b/plugins/flow/skills/integrating-agent-platforms/agents/openai.yaml deleted file mode 100644 index c0e1fdc..0000000 --- a/plugins/flow/skills/integrating-agent-platforms/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Agent Platform Integration" - short_description: "Flow packaging, installation, update, marketplace, and host integration behavior" diff --git a/plugins/flow/skills/integrating-agent-platforms/references/host-matrix.md b/plugins/flow/skills/integrating-agent-platforms/references/host-matrix.md deleted file mode 100644 index 918c2dc..0000000 --- a/plugins/flow/skills/integrating-agent-platforms/references/host-matrix.md +++ /dev/null @@ -1,48 +0,0 @@ -# Host Matrix - -## Claude Code - -- Preferred install path: marketplace. -- Preferred commands: - - `claude plugin marketplace add ` - - `claude plugin install @` - - `claude plugin marketplace update [name]` - - `claude plugin update @` -- Git-based marketplaces are supported and are the best fit for shared plugins. -- Updating marketplace metadata and updating an installed plugin are separate steps. - -## Gemini CLI - -- Preferred install path: native extension install from GitHub. -- Preferred commands: - - `gemini extensions install [--auto-update]` - - `gemini extensions update ` - - `gemini extensions link ` for local development only -- Gemini copies installed extensions into `~/.gemini/extensions`. -- Management operations take effect after the CLI session is restarted. -- `contextFileName` controls which extension-local context file is loaded. - -## Codex CLI - -- Plugin manifests live in `.codex-plugin/plugin.json`. -- Marketplaces live in `.agents/plugins/marketplace.json`. -- Keep the marketplace as the published catalog and treat the installed plugin as a cached copy. -- Codex can refresh plugin cache/version state independently of the source checkout, so docs should distinguish source checkout from installed state. - -## OpenCode - -- Preferred install path: local plugin files in `.opencode/plugins/` or `~/.config/opencode/plugins/`. -- `opencode.json` `plugin` entries are for npm packages, not the default local-development path. -- OpenCode merges config layers instead of replacing them. -- npm plugins are cached under `~/.cache/opencode/node_modules/`. -- Skills can be discovered from: - - `.opencode/skills/` - - `.claude/skills/` - - `.agents/skills/` - - their matching global directories - -## Google Antigravity - -- Prefer workspace-local `.agents` assets when the build supports them. -- Keep a global fallback for environments that still rely on home-directory skills. -- Separate workspace guidance from global installer guidance so local repos avoid unnecessary admin work. diff --git a/plugins/flow/skills/ipc/SKILL.md b/plugins/flow/skills/ipc/SKILL.md deleted file mode 100644 index 1546565..0000000 --- a/plugins/flow/skills/ipc/SKILL.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -name: ipc -description: "Use when implementing inter-process communication, shared memory regions, SPSC or MPMC ring buffers, zero-copy data transfer, platform synchronization primitives, or process notification mechanisms." ---- - -# IPC (Inter-Process Communication) - -## Scope - -- Shared memory regions (POSIX `shm_open` + `mmap`, Windows `CreateFileMapping`). -- Lock-free ring buffers (SPSC, MPMC). -- Platform-specific synchronization (futex, ulock, Win32 Event). -- Notification mechanisms (eventfd, pipe, kqueue). -- Async ring integration with Tokio. -- Buffer pools and zero-copy data transfer. - - - -## Shared Memory Regions - -### ShmRegion Pattern - - - -```rust -pub struct ShmRegion { - ptr: *mut u8, - len: usize, - fd: OwnedFd, // RAII: closes on drop -} - -impl ShmRegion { - pub fn create(name: &str, size: usize) -> Result { - // SAFETY: shm_open + ftruncate + mmap is the standard POSIX pattern. - // We own the fd exclusively and unlink after mapping. - unsafe { - let fd = shm_open(name, O_CREAT | O_RDWR, 0o600)?; - ftruncate(fd, size as libc::off_t)?; - let ptr = mmap( - std::ptr::null_mut(), - size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - fd, - 0, - )?; - shm_unlink(name)?; // Unlink immediately — fd keeps it alive - Ok(Self { ptr: ptr.cast(), len: size, fd: OwnedFd(fd) }) - } - } - - pub fn as_slice(&self) -> &[u8] { - // SAFETY: ptr is valid for len bytes and region outlives self - unsafe { std::slice::from_raw_parts(self.ptr, self.len) } - } -} - -impl Drop for ShmRegion { - fn drop(&mut self) { - // SAFETY: We own this mapping exclusively - unsafe { munmap(self.ptr.cast(), self.len) }; - // fd closed by OwnedFd::drop - } -} -``` - - - - -## Guardrails - -- **Always unlink shared memory immediately** -- Use `shm_unlink` as soon as the memory is mapped to ensure it is correctly cleaned up by the OS when the process exits. -- **Use RAII for all resources** -- Wrap pointers, file descriptors, and mapping handles in structs that implement `Drop` to prevent resource leaks on crash or error. -- **Align to page boundaries** -- Shared memory region sizes should always be a multiple of the system page size (typically 4096 bytes) for optimal mapping. -- **Capacity must be a power of two** -- For ring buffers, this allows for fast indexing using bitwise AND instead of expensive modulo operations. -- **Align headers to cache lines (64 bytes)** -- This prevents false sharing between producers and consumers on different CPU cores. - - - -## Validation Checkpoint - -- [ ] Shared memory is unlinked immediately after mapping -- [ ] RAII cleanup logic is implemented in `Drop` for all resources -- [ ] Ring buffer capacity is a power of two -- [ ] Headers are cache-line aligned (64 bytes) with explicit padding -- [ ] Bounds checks are performed on all reads and writes from shared memory -- [ ] Atomic memory ordering is correctly applied (`Acquire`/`Release`) - diff --git a/plugins/flow/skills/ipc/agents/openai.yaml b/plugins/flow/skills/ipc/agents/openai.yaml deleted file mode 100644 index fac19e7..0000000 --- a/plugins/flow/skills/ipc/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "IPC" - short_description: "Zero-copy IPC, shared memory, ring buffers, sync primitives, and notifications" diff --git a/plugins/flow/skills/makefile/SKILL.md b/plugins/flow/skills/makefile/SKILL.md deleted file mode 100644 index 40fb827..0000000 --- a/plugins/flow/skills/makefile/SKILL.md +++ /dev/null @@ -1,154 +0,0 @@ ---- -name: makefile -description: "Use when editing Makefile or GNUmakefile, adding development targets, wiring uv commands, defining .PHONY rules, creating self-documenting help, or fixing Make recipe safety." ---- - -# Makefile Skill - - - -## Overview - -All projects should use a consistent `Makefile` structure to ensure developer familiarity. The standard includes: - -- **Configuration**: `.ONESHELL`, `.EXPORT_ALL_VARIABLES`, strict shell flags. -- **Presentation**: Standard colors (`BLUE`, `GREEN`, `RED`, `YELLOW`) and icons (`ℹ`, `✓`, `⚠`, `✖`). -- **Help System**: Self-documenting `help` target parsing `##` comments. -- **Standard Targets**: `install`, `upgrade`, `clean`, `test`, `lint`. - -## Standard Template - -Copy this template to the root of new projects: - - - -```makefile -SHELL := /bin/bash -# ============================================================================= -# Variables -# ============================================================================= - -.DEFAULT_GOAL:=help -.ONESHELL: -.EXPORT_ALL_VARIABLES: -MAKEFLAGS += --no-print-directory - -# Silence output if VERBOSE is not set -ifndef VERBOSE -.SILENT: -endif - -# Define colors and formatting -BLUE := $(shell printf "\033[1;34m") -GREEN := $(shell printf "\033[1;32m") -RED := $(shell printf "\033[1;31m") -YELLOW := $(shell printf "\033[1;33m") -NC := $(shell printf "\033[0m") -INFO := $(shell printf "$(BLUE)ℹ$(NC)") -OK := $(shell printf "$(GREEN)✓$(NC)") -WARN := $(shell printf "$(YELLOW)⚠$(NC)") -ERROR := $(shell printf "$(RED)✖$(NC)") - -.PHONY: help -help: ## Display this help text for Makefile - @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z0-9_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) - - -# ============================================================================= -# Developer Utils -# ============================================================================= - -.PHONY: install -install: ## Install dependencies for local development - @echo "${INFO} Installing dependencies..." - @uv sync - @echo "${OK} Installation complete" - -.PHONY: upgrade -upgrade: ## Upgrade all dependencies - @echo "${INFO} Updating dependencies... 🔄" - @uv lock --upgrade - @uv run pre-commit autoupdate - @echo "${OK} Dependencies updated 🔄" - -.PHONY: clean -clean: ## Cleanup temporary build artifacts - @echo "${INFO} Cleaning working directory..." - @rm -rf .pytest_cache .ruff_cache build/ dist/ .coverage coverage.xml htmlcov/ - @find . -name '*.egg-info' -exec rm -rf {} + - @find . -name '__pycache__' -exec rm -rf {} + - @echo "${OK} Working directory cleaned" - -.PHONY: destroy -destroy: ## Destroy local environment - @echo "${INFO} Destroying environment... 🗑️" - @rm -rf .venv - @echo "${OK} Environment destroyed" - - -# ============================================================================= -# Quality & Testing -# ============================================================================= - -.PHONY: lint -lint: ## Run all linting checks - @echo "${INFO} Running linting... 🔍" - @uv run pre-commit run --all-files - @echo "${OK} Linting passed ✨" - -.PHONY: test -test: ## Run tests - @echo "${INFO} Running tests... 🧪" - @uv run pytest - @echo "${OK} Tests passed ✨" -``` - - - -## Best Practices - -1. **Emojis**: Use emojis consistent with the tool being used: - - 📦 Packaging/Install - - 🔄 Updates - - 🧹 Cleanup - - 🗑️ Destruction - - 🔍 Linting/Inspection - - 🧪 Testing - - ✨ Success - - 🚀 Execution/Server - - 📊 Analytics/Benchmarks - - 🦀 Rust - - 🐍 Python - -2. **Output**: Always use the `${INFO}`, `${OK}`, `${WARN}`, `${ERROR}` variables to prefix status messages. -3. **Silence**: Use `.SILENT:` (conditioned on `VERBOSE`) to keep the output clean for the user, revealing commands only when debugging. - - - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Bash](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/bash.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -Add guardrails instructions here. - - - -## Validation - -Add validation instructions here. - diff --git a/plugins/flow/skills/makefile/agents/openai.yaml b/plugins/flow/skills/makefile/agents/openai.yaml deleted file mode 100644 index d7c5c69..0000000 --- a/plugins/flow/skills/makefile/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Makefile" - short_description: "GNU Make targets, self-documenting help, uv workflows, and recipe safety" diff --git a/plugins/flow/skills/mojo-tools/SKILL.md b/plugins/flow/skills/mojo-tools/SKILL.md deleted file mode 100644 index 1f3fef8..0000000 --- a/plugins/flow/skills/mojo-tools/SKILL.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -name: mojo-tools -description: "Use when editing Mojo code, .mojo files, fire emoji files, SIMD kernels, Python-Mojo interop, GIL-free parallelism, C FFI, hatch-mojo build hooks, or packaging Mojo extensions." ---- - -# Mojo (Flow Tools) - - - -## 🚀 Official Modular Skills (Highly Recommended) - -For comprehensive support for modern Mojo syntax, project initialization, and GPU programming, we highly recommend installing the official Modular agent skills: - -- **mojo-syntax**: Overcomes agent misconceptions and ensures correct modern syntax. -- **new-modular-project**: Wizard for initializing Mojo/MAX projects with Pixi and UV. -- **mojo-python-interop**: Expert guidance for zero-copy Python interaction. -- **mojo-gpu-fundamentals**: Patterns for high-performance accelerator programming. - -**Installation:** - -```bash -npx skills add modular/skills -``` - -## Supplemental Patterns - -The patterns below focus on project integration and build hooks. - -### Hatch-Mojo Build Hook - -The `hatch-mojo` plugin allows seamless compilation of Mojo source files during the standard Python build process. - -**Key Configuration (`pyproject.toml`):** - -```toml -[tool.hatch.build.targets.wheel.hooks.mojo] -# Configuration for mojo compilation -``` - - - - -## Guardrails - -- **Explicitly define memory ownership** -- Use `owned`, `borrowed`, and `inout` to manage data flow and avoid unnecessary copies. -- **Use `SIMD` for performance-critical logic** -- Mojo excels at vectorization; always consider SIMD when processing large arrays. -- **Verify data alignment** -- Ensure pointers are aligned for the target architecture, especially when using SIMD operations. - - - -## Validation Checkpoint - -- [ ] Explicit ownership markers are correctly applied to arguments -- [ ] SIMD vectorization is implemented where applicable -- [ ] Memory safety is verified through ownership and borrowing checks - - - -## Hatch-Mojo Build Hook Example - -```toml -[tool.hatch.build.targets.wheel.hooks.mojo] -dependencies = ["hatch-mojo"] -path = "src/my_extension.mojo" -``` - - diff --git a/plugins/flow/skills/mojo-tools/agents/openai.yaml b/plugins/flow/skills/mojo-tools/agents/openai.yaml deleted file mode 100644 index e611325..0000000 --- a/plugins/flow/skills/mojo-tools/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Mojo Tools" - short_description: "Flow supplemental Mojo patterns for SIMD, interop, FFI, packaging, and tests" diff --git a/plugins/flow/skills/mojo-tools/references/build.md b/plugins/flow/skills/mojo-tools/references/build.md deleted file mode 100644 index 8b5de62..0000000 --- a/plugins/flow/skills/mojo-tools/references/build.md +++ /dev/null @@ -1,36 +0,0 @@ -# Build System (hatch-mojo) - -Use `hatch-mojo` to compile Mojo sources into Python extensions or shared libraries. - -## pyproject.toml Setup - -```toml -[build-system] -build-backend = "hatchling.build" -requires = ["hatchling", "hatch-mojo"] - -[[tool.hatch.build.targets.wheel.hooks.mojo.jobs]] -name = "core" -input = "src/mo/my_pkg/core.mojo" -emit = "python-extension" -module = "my_pkg._core" -include-dirs = ["src/mo"] -``` - -## Job Types (emit) - -| `emit` Value | Output | Use Case | -|--------------|--------|----------| -| `python-extension` | `.so`/`.pyd` | Mojo kernels callable from Python | -| `shared-lib` | `.so` | Shared library | -| `executable` | Binary | Standalone CLI tools | - -## Manual Compilation - -```bash -# Build shared library -mojo build --emit shared-lib src/mo/module.mojo -o src/py/package/_module.so - -# Build standalone binary -mojo build src/mo/main.mojo -o dist/main -``` diff --git a/plugins/flow/skills/mojo-tools/references/ffi.md b/plugins/flow/skills/mojo-tools/references/ffi.md deleted file mode 100644 index c6c4fcc..0000000 --- a/plugins/flow/skills/mojo-tools/references/ffi.md +++ /dev/null @@ -1,50 +0,0 @@ -# C FFI (Foreign Function Interface) - -## Static External Calls - -Use `external_call` for compile-time linked C functions (e.g., libc). - -```mojo -from sys.ffi import external_call - -fn get_time() -> Float64: - return external_call["clock", Float64]() -``` - -## Dynamic Library Loading (DLHandle) - -Load shared libraries at runtime. - -```mojo -from sys.ffi import DLHandle - -fn load_custom_library(): - var lib = DLHandle("./libcustom_ops.so") - var compute_fn = lib.get_function[fn (UnsafePointer[Float32], Int) -> Float32]("custom_compute") - - var data = UnsafePointer[Float32].alloc(1024) - var result = compute_fn(data, 1024) - data.free() -``` - -## C Struct Mapping - -```mojo -@register_passable("trivial") -struct CTimeSpec: - var tv_sec: Int64 - var tv_nsec: Int64 - - fn __init__(out self): - self.tv_sec = 0 - self.tv_nsec = 0 -``` - -## Type Mapping - -| C Type | Mojo Type | -|--------|-----------| -| `int` / `int32_t` | `Int32` | -| `long` / `int64_t` | `Int64` | -| `float` | `Float32` | -| `void*` | `UnsafePointer[NoneType]` | diff --git a/plugins/flow/skills/mojo-tools/references/hatch_mojo.md b/plugins/flow/skills/mojo-tools/references/hatch_mojo.md deleted file mode 100644 index ce226fd..0000000 --- a/plugins/flow/skills/mojo-tools/references/hatch_mojo.md +++ /dev/null @@ -1,185 +0,0 @@ -# hatch-mojo Usage Guide - -`hatch-mojo` is a Hatch build hook that compiles `.mojo` sources during Python package builds. - -`hatch-mojo` is available at `https://github.com/cofin/hatch-mojo`. - -## 1. Prerequisites - -1. Install Mojo toolchain and ensure `mojo` is runnable. -2. Use a Hatch/Hatchling build backend project (`pyproject.toml`). -3. Add `hatch-mojo` as a build dependency. - -## 2. Install - -```bash -uv add hatch-mojo -``` - -## 3. Minimal `pyproject.toml` setup - -```toml -[build-system] -build-backend = "hatchling.build" -requires = ["hatchling", "hatch-mojo"] - -[tool.hatch.build.targets.wheel.hooks.mojo] -targets = ["wheel"] - -[[tool.hatch.build.targets.wheel.hooks.mojo.jobs]] -name = "core" -input = "src/mo/my_pkg/core.mojo" -emit = "python-extension" -module = "my_pkg._core" -include-dirs = ["src/mo"] -``` - -Build: - -```bash -hatch build -t wheel -``` - -## 4. Global hook options - -```toml -[tool.hatch.build.targets.wheel.hooks.mojo] -mojo-bin = "/opt/mojo/bin/mojo" # or use HATCH_MOJO_BIN env var -parallel = true -fail-fast = true -clean-before-build = false -clean-after-build = false -skip-editable = true -build-dir = "build/mojo" -targets = ["wheel"] -include = ["src/mo/**/*.mojo"] -exclude = ["**/experimental*.mojo"] -bundle-libs = true -``` - -## 5. Profiles and multiple jobs - -Profiles reduce duplication across compile jobs. - -```toml -[tool.hatch.build.targets.wheel.hooks.mojo.profiles.default] -include-dirs = ["src/mo"] -flags = ["-I", "vendor/include"] - -[[tool.hatch.build.targets.wheel.hooks.mojo.jobs]] -name = "core" -profiles = ["default"] -input = "src/mo/my_pkg/core.mojo" -emit = "python-extension" -module = "my_pkg._core" - -[[tool.hatch.build.targets.wheel.hooks.mojo.jobs]] -name = "cli" -profiles = ["default"] -input = "src/mo/my_pkg/cli.mojo" -emit = "executable" -install = { kind = "scripts", path = "my-cli" } -depends-on = ["core"] -``` - -For non-Python artifacts (`shared-lib`, `static-lib`, `object`, `executable`), set `install = { kind, path }`. - -## 6. Runtime library bundling (`bundle-libs`) - -Set: - -```toml -[tool.hatch.build.targets.wheel.hooks.mojo] -bundle-libs = true -``` - -What it does: - -1. Bundles Mojo runtime libs into the wheel. -2. Linux: updates RPATH (requires `patchelf`). -3. macOS: rewrites dylib references (`install_name_tool`). -4. Adds `NOTICE.mojo-runtime`; copies SDK license if present. - -## 7. cibuildwheel notes - -### Linux (manylinux) - -If your Mojo SDK requires a newer glibc/libstdc++ baseline, use retagging flow: - -```toml -[tool.cibuildwheel.linux] -repair-wheel-command = "python -m wheel tags --remove --platform-tag manylinux_2_34_x86_64 {wheel} && mv {wheel} {dest_dir}" -``` - -### macOS - -Use standard `delocate`: - -```toml -[tool.cibuildwheel.macos] -repair-wheel-command = "delocate-wheel -w {dest_dir} {wheel}" -``` - -If needed, pass `DYLD_LIBRARY_PATH` inline in the repair command (SIP may strip it from environment inheritance). - -## 8. Common errors - -1. `mojo executable not found` -Solution: set `mojo-bin`, export `HATCH_MOJO_BIN`, or add `mojo` to `PATH`. - -2. `No build jobs resolved` -Solution: verify `input` paths, `include`/`exclude` globs, and target matching. - -3. Non-Python output missing from wheel -Solution: add `install = { kind, path }` on non-extension jobs. - -4. Python extension not importable -Solution: verify `module` matches final package import path (e.g., `my_pkg._core`). - -## 9. Deployment - -### Publishing to PyPI - -Use `uv publish` for publishing completed wheels. Current standards prefer OIDC authentication. - ---- - -## 10. CI/CD Actions - -Example GitHub Actions workflow utilizing `cibuildwheel` to compile multi-platform wheels support: - -```yaml -name: Build Wheels -on: [push, pull_request] - -jobs: - build_wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, macos-latest] - - steps: - - uses: actions/checkout@v4 - - - name: Install Mojo - run: | - curl -s https://get.modular.com | sh - - modular install mojo - echo "$HOME/.modular/pkg/packages.modular.com_mojo/bin" >> $GITHUB_PATH - - - name: Build and test wheels - uses: pypa/cibuildwheel@v2.22 - env: - CIBW_BEFORE_BUILD: "pip install hatchling hatch-mojo" - # Apply platform-conditional repair commands from section 7 -``` - -## 11. Where to learn more - -1. Library repo: `https://github.com/cofin/hatch-mojo` -2. Project README: `https://github.com/cofin/hatch-mojo#readme` -3. Hatch build hooks: `https://hatch.pypa.io/` -4. Mojo docs: `https://docs.modular.com/mojo/` -5. cibuildwheel docs: `https://cibuildwheel.pypa.io/` diff --git a/plugins/flow/skills/mojo-tools/references/interop.md b/plugins/flow/skills/mojo-tools/references/interop.md deleted file mode 100644 index 3c6a4f6..0000000 --- a/plugins/flow/skills/mojo-tools/references/interop.md +++ /dev/null @@ -1,42 +0,0 @@ -# Zero-Copy Python Interop - -## Via __array_interface__ - -Exchange data with NumPy without copying. - -```mojo -fn from_numpy(np_array: PythonObject) -> Tensor[DType.float32]: - // Extract raw pointer from NumPy's array interface - let interface = np_array.__array_interface__ - let data_ptr = interface["data"][0].to_int() - let shape = interface["shape"] - - // SAFETY: np_array must remain alive while this tensor exists. - let ptr = UnsafePointer[Float32](address=data_ptr) - return Tensor[DType.float32](ptr, shape[0].to_int()) -``` - -## Returning Data to Python - -```mojo -fn to_numpy(tensor: Tensor[DType.float32]) -> PythonObject: - let np = Python.import_module("numpy") - return np.frombuffer( - tensor.data().as_bytes(), - dtype=np.float32 - ).reshape(tensor.shape()) -``` - -## Python C-API Extensions - -### Module Entry Point - -```mojo -from python.module import PythonModuleBuilder - -@export -fn PyInit_my_module() -> PythonObject: - var builder = PythonModuleBuilder("my_module") - builder.add_function("dot_product", dot_product_wrapper) - return builder.build() -``` diff --git a/plugins/flow/skills/mojo-tools/references/performance.md b/plugins/flow/skills/mojo-tools/references/performance.md deleted file mode 100644 index f3f469a..0000000 --- a/plugins/flow/skills/mojo-tools/references/performance.md +++ /dev/null @@ -1,47 +0,0 @@ -# Mojo Performance Optimization - -## SIMD-First Vectorization - -Replace scalar loops with SIMD operations for hardware acceleration. - -```mojo -from algorithm import vectorize - -fn relu_simd(inout tensor: Tensor[DType.float32]): - alias simd_width = simdwidthof[DType.float32]() - let zero = SIMD[DType.float32, simd_width](0) - - @parameter - fn _relu[width: Int](idx: Int): - let val = tensor.load[width=width](idx) - tensor.store(idx, val.max(zero)) - - vectorize[_relu, simd_width](tensor.num_elements()) -``` - -**Rules:** - -- Use `simdwidthof` to auto-detect hardware SIMD width. -- Use `@parameter` for compile-time loop specialization. - ---- - -## GIL-Free Parallelism - -True multi-core scaling without Python's GIL. - -```mojo -from algorithm import parallelize - -fn parallel_transform(inout data: Tensor[DType.float32], num_workers: Int): - let chunk_size = data.num_elements() // num_workers - - @parameter - fn _worker(worker_id: Int): - let start = worker_id * chunk_size - let end = min(start + chunk_size, data.num_elements()) - for i in range(start, end): - data[i] = expensive_compute(data[i]) - - parallelize[_worker](num_workers) -``` diff --git a/plugins/flow/skills/mojo-tools/references/testing.md b/plugins/flow/skills/mojo-tools/references/testing.md deleted file mode 100644 index eb02f59..0000000 --- a/plugins/flow/skills/mojo-tools/references/testing.md +++ /dev/null @@ -1,38 +0,0 @@ -# Testing Mojo - -## Mojo Unit Tests - -```mojo -fn test_dot_product(): - let a = SIMD[DType.float32, 4](1.0, 2.0, 3.0, 4.0) - let b = SIMD[DType.float32, 4](5.0, 6.0, 7.0, 8.0) - let result = dot_product(a, b) - assert_almost_equal(result, 70.0) -``` - -Run tests via CLI: - -```bash -mojo test src/mo/tests/ -``` - -## Python Boundary Tests - -Verify that the Mojo extension works correctly when called from Python. - -```python -import numpy as np -from my_package._my_module import dot_product - -def test_dot_product(): - a = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32) - b = np.array([5.0, 6.0, 7.0, 8.0], dtype=np.float32) - result = dot_product(a, b) - np.testing.assert_almost_equal(result, 70.0) -``` - -Run via pytest: - -```bash -uv run pytest tests/ -``` diff --git a/plugins/flow/skills/mysql/SKILL.md b/plugins/flow/skills/mysql/SKILL.md deleted file mode 100644 index 2f6e0f4..0000000 --- a/plugins/flow/skills/mysql/SKILL.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -name: mysql -description: "Use when writing MySQL or MariaDB SQL, editing MySQL-flavored .sql files, using mysql CLI, mysqldump, connection strings, InnoDB settings, replication, stored procedures, JSON, or query tuning." ---- - -# MySQL / MariaDB - -MySQL is the world's most popular open-source relational database, powering applications from small web apps to large-scale internet services. This skill covers MySQL 8.0+ (and MariaDB where noted). - -## Quick Reference - -### Connection Patterns - -```python -# Python (PyMySQL) -- always parameterized, always utf8mb4 -import pymysql - -conn = pymysql.connect( - host="localhost", - user="app_user", - password="secret", - database="mydb", - charset="utf8mb4", - cursorclass=pymysql.cursors.DictCursor, -) - -with conn: - with conn.cursor() as cursor: - cursor.execute("SELECT * FROM users WHERE id = %s", (42,)) - user = cursor.fetchone() - conn.commit() -``` - -### Key SQL Patterns - -```sql --- CTE (8.0+) -WITH active_users AS ( - SELECT id, name FROM users WHERE status = 'active' -) -SELECT au.name, COUNT(o.id) AS order_count - FROM active_users au - JOIN orders o ON o.user_id = au.id - GROUP BY au.name; - --- Window function -SELECT customer_id, order_date, total, - ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date DESC) AS rn - FROM orders; - --- Upsert -INSERT INTO counters (key_name, value) -VALUES ('page_views', 1) -ON DUPLICATE KEY UPDATE value = value + VALUES(value); -``` - -### InnoDB Essentials - -- **Clustered index** -- the primary key IS the table; rows stored in PK order. -- **Secondary index lookup** -- two B+tree traversals (secondary -> PK -> row). -- **Sequential PKs** (AUTO_INCREMENT) are fast; random PKs (UUIDs) cause page splits. -- **UUID workaround** -- use `UUID_TO_BIN(UUID(), 1)` for ordered UUIDs in MySQL 8.0+. -- **Row format** -- DYNAMIC (default in 8.0+) is the best general-purpose choice. -- **Buffer pool** -- size to ~70-80% of available RAM on dedicated servers. - - - -## Workflow - -### Step 1: Schema Design - -Choose InnoDB (always). Use AUTO_INCREMENT integer PKs unless UUIDs are required (then use ordered UUID v7). Set `CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci` at the database and table level. - -### Step 2: Write Queries - -Use parameterized queries in application code -- never string interpolation. Use CTEs for readability. Use window functions instead of self-joins for ranking/running totals. - -### Step 3: Index Strategy - -Create indexes to support WHERE, JOIN, and ORDER BY clauses. Use composite indexes following the leftmost-prefix rule. Check coverage with `EXPLAIN`. - -### Step 4: Performance Tuning - -Run `EXPLAIN ANALYZE` on slow queries. Check the slow query log (`long_query_time = 1`). Tune buffer pool size, redo log size, and `innodb_flush_log_at_trx_commit` for the workload. - -### Step 5: Validate - -Confirm query plans use indexes (no unexpected full table scans). Verify `utf8mb4` encoding. Test with realistic data volumes. - - - - - -## Guardrails - -- **Always use parameterized queries** -- never concatenate user input into SQL strings. Use `%s` placeholders (Python) or `?` (Node/Java). -- **InnoDB by default** -- never use MyISAM for new tables. InnoDB provides transactions, row-level locking, and crash recovery. -- **utf8mb4 encoding** -- always specify `charset=utf8mb4` in connections and `CHARACTER SET utf8mb4` in DDL. Plain `utf8` is a 3-byte subset that cannot store emoji or some CJK characters. -- **Avoid SELECT \*** -- name columns explicitly to prevent breakage when schema changes and to enable covering indexes. -- **AUTO_INCREMENT for PKs** -- avoids clustered index fragmentation. If UUIDs are required, use `UUID_TO_BIN(UUID(), 1)` for ordered storage. -- **Test with EXPLAIN before deploying** -- verify index usage and join strategies on production-like data. - - - - - -### Validation Checkpoint - -Before delivering MySQL code, verify: - -- [ ] All queries use parameterized placeholders (no string interpolation) -- [ ] Tables use InnoDB engine -- [ ] Character set is utf8mb4 (not utf8 or latin1) -- [ ] Primary keys are defined (AUTO_INCREMENT or ordered UUID) -- [ ] Indexes exist for WHERE/JOIN/ORDER BY columns -- [ ] EXPLAIN output shows index usage for critical queries - - - - - -## Example - -**Task:** Parameterized query with index creation for an orders lookup. - -```sql --- Create table with proper encoding and engine -CREATE TABLE orders ( - id BIGINT AUTO_INCREMENT PRIMARY KEY, - user_id BIGINT NOT NULL, - status ENUM('pending', 'shipped', 'delivered', 'cancelled') NOT NULL DEFAULT 'pending', - total DECIMAL(10, 2) NOT NULL, - created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, - INDEX idx_orders_user_status (user_id, status), - INDEX idx_orders_created (created_at) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; - --- Verify index usage -EXPLAIN SELECT id, total, created_at - FROM orders - WHERE user_id = 42 - AND status = 'shipped' - ORDER BY created_at DESC - LIMIT 20; -``` - -```python -# Application code -- parameterized query -async def get_user_orders(conn, user_id: int, status: str) -> list[dict]: - async with conn.cursor() as cursor: - await cursor.execute( - "SELECT id, total, created_at FROM orders " - "WHERE user_id = %s AND status = %s " - "ORDER BY created_at DESC LIMIT 20", - (user_id, status), - ) - return await cursor.fetchall() -``` - - - ---- - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[SQL Patterns](references/sql_patterns.md)** -- Window functions, CTEs, recursive queries, JSON_TABLE, upserts, generated columns. -- **[Stored Procedures & Functions](references/stored_procedures.md)** -- CREATE PROCEDURE/FUNCTION, control flow, cursors, error handling, triggers. -- **[Performance Tuning](references/performance.md)** -- EXPLAIN/EXPLAIN ANALYZE, index strategies, slow query log, buffer pool tuning. -- **[Connection Patterns](references/connections.md)** -- Python, Node.js, Java, Go connectors; connection pooling; SSL/TLS. -- **[JSON in MySQL](references/json.md)** -- JSON data type, extraction operators, JSON_TABLE, multi-valued indexes. -- **[InnoDB Internals](references/innodb.md)** -- Clustered index, row formats, buffer pool, redo log, MVCC, deadlock detection. -- **[Security](references/security.md)** -- User/role management, authentication plugins, SSL/TLS, encryption at rest. -- **[Administration](references/admin.md)** -- Backups (mysqldump, XtraBackup), binary logs, PITR, table maintenance, upgrades. -- **[Replication & HA](references/replication.md)** -- Binary log replication, GTID, Group Replication, InnoDB Cluster. -- **[MySQL CLI & Tools](references/mysql_cli.md)** -- mysql client, mycli, MySQL Shell, Percona Toolkit, gh-ost. - ---- - -## Official References - -- MySQL 8.0 Reference Manual: -- MySQL 8.4 Reference Manual: -- MariaDB Knowledge Base: -- MySQL Shell User Guide: -- Percona Toolkit: - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [MySQL/MariaDB](https://github.com/cofin/flow/blob/main/templates/styleguides/databases/mysql_mariadb.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/mysql/agents/openai.yaml b/plugins/flow/skills/mysql/agents/openai.yaml deleted file mode 100644 index 0b69d6d..0000000 --- a/plugins/flow/skills/mysql/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "MySQL" - short_description: "MySQL and MariaDB SQL, CLI, InnoDB, replication, security, and performance" diff --git a/plugins/flow/skills/mysql/references/admin.md b/plugins/flow/skills/mysql/references/admin.md deleted file mode 100644 index ff8f791..0000000 --- a/plugins/flow/skills/mysql/references/admin.md +++ /dev/null @@ -1,352 +0,0 @@ -# Administration - -## Overview - -This reference covers MySQL backup and restore strategies, binary log management, point-in-time recovery, table maintenance, character sets, and upgrade procedures. - ---- - -## Logical Backups - -### mysqldump - -```bash -# Full database backup with consistent snapshot (InnoDB only). -mysqldump --single-transaction --routines --triggers --events \ - --set-gtid-purged=OFF -u root -p mydb > mydb_backup.sql - -# All databases. -mysqldump --single-transaction --all-databases --routines --triggers --events \ - -u root -p > full_backup.sql - -# Specific tables. -mysqldump --single-transaction -u root -p mydb users orders > tables_backup.sql - -# Compressed backup. -mysqldump --single-transaction -u root -p mydb | gzip > mydb_backup.sql.gz - -# Schema only (no data). -mysqldump --no-data -u root -p mydb > schema_only.sql - -# Data only (no CREATE TABLE statements). -mysqldump --no-create-info -u root -p mydb > data_only.sql - -# Restore from dump. -mysql -u root -p mydb < mydb_backup.sql -gunzip < mydb_backup.sql.gz | mysql -u root -p mydb -``` - -**Key flags:** - -- `--single-transaction`: Uses a consistent snapshot for InnoDB (no table locks). -- `--routines`: Include stored procedures and functions. -- `--triggers`: Include triggers (on by default since 5.7). -- `--events`: Include scheduled events. -- `--set-gtid-purged=OFF`: Omit GTID info if you do not need replication consistency. -- `--master-data=2`: Record binary log position as a comment (useful for setting up replicas). - -### mysqlpump (Parallel Dump) - -```bash -# Parallel logical dump with compression. -mysqlpump --default-parallelism=4 --compress-output=zlib \ - -u root -p --databases mydb > mydb_pump.zlib - -# Exclude specific tables. -mysqlpump -u root -p --databases mydb \ - --exclude-tables=audit_log,temp_data > mydb_filtered.sql -``` - ---- - -## MySQL Shell Dump/Load Utilities - -MySQL Shell provides high-performance parallel dump and load utilities. - -```bash -# Dump an entire instance (parallel, chunked). -mysqlsh root@localhost -- util dumpInstance /backup/full \ - --threads=8 --compression=zstd - -# Dump specific schemas. -mysqlsh root@localhost -- util dumpSchemas mydb,analytics /backup/schemas \ - --threads=8 - -# Dump specific tables. -mysqlsh root@localhost -- util dumpTables mydb users,orders /backup/tables \ - --threads=8 - -# Parallel restore (much faster than mysql < dump.sql). -mysqlsh root@localhost -- util loadDump /backup/full \ - --threads=8 --deferTableIndexes=all --resetProgress - -# Load into a different schema. -mysqlsh root@localhost -- util loadDump /backup/schemas \ - --schema=mydb_staging --threads=8 -``` - -**Advantages over mysqldump:** parallel export/import, chunked tables, resumable loads, progress tracking, zstd compression. - ---- - -## Physical Backups: Percona XtraBackup - -```bash -# Full backup (hot, non-blocking for InnoDB). -xtrabackup --backup --target-dir=/backup/full \ - --user=root --password=secret - -# Prepare the backup (apply redo log). -xtrabackup --prepare --target-dir=/backup/full - -# Restore: stop MySQL, copy files, fix ownership, start. -systemctl stop mysql -xtrabackup --copy-back --target-dir=/backup/full -chown -R mysql:mysql /var/lib/mysql -systemctl start mysql - -# Incremental backup. -xtrabackup --backup --target-dir=/backup/inc1 \ - --incremental-basedir=/backup/full \ - --user=root --password=secret - -# Prepare incremental (apply to base). -xtrabackup --prepare --apply-log-only --target-dir=/backup/full -xtrabackup --prepare --target-dir=/backup/full \ - --incremental-dir=/backup/inc1 - -# Streaming to another server. -xtrabackup --backup --stream=xbstream --user=root --password=secret \ - | ssh backup-server "xbstream -x -C /backup/full" -``` - ---- - -## Binary Log Management - -### Configuration - -```sql --- Enable binary logging (required for replication and PITR). --- In my.cnf: --- log-bin = mysql-bin --- binlog_format = ROW -- ROW is default and recommended in 8.0+ --- binlog_expire_logs_seconds = 604800 -- 7 days (replaces expire_logs_days) --- max_binlog_size = 100M - --- Check binary log status. -SHOW BINARY LOGS; -SHOW MASTER STATUS; -- current binary log file and position -SHOW BINLOG EVENTS IN 'mysql-bin.000042' LIMIT 20; -``` - -### Purging Binary Logs - -```sql --- Purge logs older than a specific date. -PURGE BINARY LOGS BEFORE '2026-03-20 00:00:00'; - --- Purge up to a specific log file. -PURGE BINARY LOGS TO 'mysql-bin.000040'; - --- Automatic purge via binlog_expire_logs_seconds (preferred). -SET GLOBAL binlog_expire_logs_seconds = 604800; -- 7 days -``` - -### mysqlbinlog Utility - -```bash -# View binary log contents in human-readable form. -mysqlbinlog mysql-bin.000042 - -# Filter by time range. -mysqlbinlog --start-datetime="2026-03-25 14:00:00" \ - --stop-datetime="2026-03-25 15:00:00" \ - mysql-bin.000042 - -# Filter by position. -mysqlbinlog --start-position=12345 --stop-position=67890 mysql-bin.000042 - -# Decode ROW format events (show actual SQL-like statements). -mysqlbinlog --verbose mysql-bin.000042 - -# Replay binary log for PITR (pipe to mysql). -mysqlbinlog --start-datetime="2026-03-25 14:00:00" \ - --stop-datetime="2026-03-25 14:59:59" \ - mysql-bin.000042 mysql-bin.000043 | mysql -u root -p -``` - -### Point-in-Time Recovery (PITR) - -```bash -# 1. Restore from the most recent full backup. -mysql -u root -p mydb < full_backup_20260325.sql - -# 2. Replay binary logs from backup position to just before the disaster. -mysqlbinlog --start-position=154 \ - --stop-datetime="2026-03-25 14:58:00" \ - mysql-bin.000042 mysql-bin.000043 | mysql -u root -p - -# With GTID-based restore: -mysqlbinlog --include-gtids="server-uuid:1-1000" \ - --exclude-gtids="server-uuid:500" \ - mysql-bin.000042 | mysql -u root -p -``` - ---- - -## Table Maintenance - -### OPTIMIZE TABLE - -```sql --- Reclaims unused space and defragments the data file. --- For InnoDB, this performs ALTER TABLE ... FORCE (rebuilds the table). --- Blocks writes during operation; use pt-online-schema-change for large tables. -OPTIMIZE TABLE orders; -``` - -### ANALYZE TABLE - -```sql --- Updates index statistics used by the query optimizer. --- Lightweight operation; safe to run regularly. -ANALYZE TABLE orders, customers, products; -``` - -### CHECK TABLE - -```sql --- Verifies table integrity. -CHECK TABLE orders; -CHECK TABLE orders EXTENDED; -- deeper check, slower -``` - -### REPAIR TABLE - -```sql --- Repairs corrupted MyISAM tables (does NOT work for InnoDB). -REPAIR TABLE myisam_table; - --- For InnoDB, dump and reimport, or use innodb_force_recovery. -``` - ---- - -## Character Sets and Collations - -### UTF-8 Configuration - -```sql --- Always use utf8mb4 (true UTF-8, supports emojis and all Unicode). --- utf8 in MySQL is an alias for utf8mb3 (3-byte, does NOT support supplementary characters). - --- Server-level defaults (my.cnf): --- character-set-server = utf8mb4 --- collation-server = utf8mb4_0900_ai_ci -- MySQL 8.0 default - --- Check current settings. -SHOW VARIABLES LIKE 'character_set%'; -SHOW VARIABLES LIKE 'collation%'; -``` - -### Collation Selection - -| Collation | Case | Accent | Notes | -|---|---|---|---| -| `utf8mb4_0900_ai_ci` | Insensitive | Insensitive | MySQL 8.0 default, Unicode 9.0 | -| `utf8mb4_0900_as_cs` | Sensitive | Sensitive | Exact matching | -| `utf8mb4_bin` | Sensitive | Sensitive | Binary comparison, fastest | -| `utf8mb4_general_ci` | Insensitive | Insensitive | Legacy, less accurate than 0900 | -| `utf8mb4_unicode_ci` | Insensitive | Insensitive | Legacy Unicode collation | - -```sql --- Set collation per column for mixed requirements. -CREATE TABLE products ( - id INT PRIMARY KEY, - name VARCHAR(200) COLLATE utf8mb4_0900_ai_ci, -- case-insensitive search - sku VARCHAR(50) COLLATE utf8mb4_bin -- exact match -); - --- Convert existing table. -ALTER TABLE products CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci; -``` - -### Connection Character Set - -```sql --- Ensure the connection uses utf8mb4. -SET NAMES utf8mb4; - --- Or set all three variables individually. -SET character_set_client = utf8mb4; -SET character_set_connection = utf8mb4; -SET character_set_results = utf8mb4; -``` - ---- - -## Upgrade Patterns - -### In-Place Upgrade (Recommended for Minor Versions) - -```bash -# 1. Backup first. -mysqldump --all-databases --routines --triggers --events \ - --single-transaction > pre_upgrade_backup.sql - -# 2. Stop MySQL. -systemctl stop mysql - -# 3. Install new binaries (package manager or binary tarball). -apt-get install mysql-server # or yum, dnf, etc. - -# 4. Start MySQL (automatic upgrade runs on startup in 8.0.16+). -systemctl start mysql - -# 5. Verify. -mysql -u root -p -e "SELECT VERSION();" -``` - -### Logical Upgrade (Major Version Changes, e.g., 5.7 -> 8.0) - -```bash -# 1. Dump from old version. -mysqldump --all-databases --routines --triggers --events \ - --single-transaction --set-gtid-purged=OFF > dump_57.sql - -# 2. Install new MySQL version on target server. - -# 3. Load the dump. -mysql -u root -p < dump_57.sql - -# 4. Run mysql_upgrade (if < 8.0.16; automatic in 8.0.16+). -mysql_upgrade -u root -p - -# 5. Restart MySQL. -systemctl restart mysql -``` - -### Pre-Upgrade Checklist - -```bash -# MySQL Shell upgrade checker (run on the OLD server). -mysqlsh root@localhost -- util checkForServerUpgrade - -# Checks for: -# - Deprecated features (utf8mb3, mysql_native_password, etc.) -# - Incompatible SQL modes -# - Reserved keywords used as identifiers -# - Removed system variables -``` - ---- - -## Official References - -- mysqldump: -- MySQL Shell Dump Utilities: -- Percona XtraBackup: -- Binary Log: -- Character Sets: -- Upgrading: diff --git a/plugins/flow/skills/mysql/references/connections.md b/plugins/flow/skills/mysql/references/connections.md deleted file mode 100644 index 92ba407..0000000 --- a/plugins/flow/skills/mysql/references/connections.md +++ /dev/null @@ -1,413 +0,0 @@ -# Connection Patterns - -## Overview - -This reference covers MySQL connection patterns across popular languages, connection pooling strategies, and SSL/TLS authentication configuration. - ---- - -## Python - -### mysql-connector-python (Official Oracle Driver) - -```python -import mysql.connector - -# Basic connection. -conn = mysql.connector.connect( - host="localhost", - port=3306, - user="app_user", - password="secret", - database="mydb", - charset="utf8mb4", - collation="utf8mb4_unicode_ci", - autocommit=False, -) - -cursor = conn.cursor(dictionary=True) # returns dicts instead of tuples -cursor.execute("SELECT id, name FROM users WHERE status = %s", ("active",)) -rows = cursor.fetchall() - -conn.commit() -cursor.close() -conn.close() -``` - -### PyMySQL - -```python -import pymysql - -# Connection with context manager. -conn = pymysql.connect( - host="localhost", - user="app_user", - password="secret", - database="mydb", - charset="utf8mb4", - cursorclass=pymysql.cursors.DictCursor, -) - -with conn: - with conn.cursor() as cursor: - cursor.execute("SELECT * FROM users WHERE id = %s", (42,)) - user = cursor.fetchone() - conn.commit() -``` - -### asyncmy (Async) - -```python -import asyncio -import asyncmy - -async def main(): - conn = await asyncmy.connect( - host="localhost", user="app_user", password="secret", - db="mydb", charset="utf8mb4", - ) - async with conn.cursor(asyncmy.cursors.DictCursor) as cursor: - await cursor.execute("SELECT * FROM users WHERE status = %s", ("active",)) - rows = await cursor.fetchall() - conn.close() - -asyncio.run(main()) -``` - -### SQLAlchemy Integration - -```python -from sqlalchemy import create_engine -from sqlalchemy.orm import Session - -# mysql-connector-python backend. -engine = create_engine( - "mysql+mysqlconnector://app_user:secret@localhost:3306/mydb", - pool_size=10, - max_overflow=20, - pool_recycle=3600, # recycle connections after 1 hour - pool_pre_ping=True, # test connection liveness before use - echo=False, -) - -# PyMySQL backend. -engine = create_engine("mysql+pymysql://app_user:secret@localhost:3306/mydb") - -with Session(engine) as session: - result = session.execute(text("SELECT * FROM users WHERE id = :id"), {"id": 42}) - user = result.mappings().one_or_none() -``` - ---- - -## Node.js (mysql2) - -### Basic Connection with Promises - -```javascript -import mysql from 'mysql2/promise'; - -const conn = await mysql.createConnection({ - host: 'localhost', - user: 'app_user', - password: 'secret', - database: 'mydb', - charset: 'utf8mb4', -}); - -// Prepared statement (uses binary protocol, prevents SQL injection). -const [rows] = await conn.execute('SELECT * FROM users WHERE id = ?', [42]); -console.log(rows); - -await conn.end(); -``` - -### Connection Pool - -```javascript -import mysql from 'mysql2/promise'; - -const pool = mysql.createPool({ - host: 'localhost', - user: 'app_user', - password: 'secret', - database: 'mydb', - waitForConnections: true, - connectionLimit: 20, - maxIdle: 10, - idleTimeout: 60000, - queueLimit: 0, - enableKeepAlive: true, - keepAliveInitialDelay: 10000, -}); - -// Pool automatically manages connection lifecycle. -const [rows] = await pool.execute('SELECT * FROM orders WHERE customer_id = ?', [42]); - -// Transaction with pool connection. -const conn = await pool.getConnection(); -try { - await conn.beginTransaction(); - await conn.execute('INSERT INTO orders (customer_id, total) VALUES (?, ?)', [42, 99.99]); - await conn.execute('UPDATE inventory SET qty = qty - 1 WHERE sku = ?', ['ABC']); - await conn.commit(); -} catch (err) { - await conn.rollback(); - throw err; -} finally { - conn.release(); // return to pool, do NOT call conn.end() -} -``` - -### Streaming Large Result Sets - -```javascript -// Use queryStream for large result sets to avoid loading everything into memory. -const stream = pool.pool.query('SELECT * FROM large_table').stream(); - -stream.on('data', (row) => { - // process row -}); -stream.on('end', () => { - console.log('Done'); -}); -``` - ---- - -## Java - -### JDBC Direct Connection - -```java -import java.sql.*; - -String url = "jdbc:mysql://localhost:3306/mydb?useSSL=true&serverTimezone=UTC&characterEncoding=utf8mb4"; -try (Connection conn = DriverManager.getConnection(url, "app_user", "secret"); - PreparedStatement ps = conn.prepareStatement("SELECT id, name FROM users WHERE status = ?")) { - ps.setString(1, "active"); - try (ResultSet rs = ps.executeQuery()) { - while (rs.next()) { - System.out.println(rs.getInt("id") + ": " + rs.getString("name")); - } - } -} -``` - -### HikariCP Connection Pool - -```java -import com.zaxxer.hikari.HikariConfig; -import com.zaxxer.hikari.HikariDataSource; - -HikariConfig config = new HikariConfig(); -config.setJdbcUrl("jdbc:mysql://localhost:3306/mydb"); -config.setUsername("app_user"); -config.setPassword("secret"); -config.setMaximumPoolSize(20); -config.setMinimumIdle(5); -config.setIdleTimeout(300000); // 5 minutes -config.setConnectionTimeout(10000); // 10 seconds -config.setMaxLifetime(1800000); // 30 minutes -config.addDataSourceProperty("cachePrepStmts", "true"); -config.addDataSourceProperty("prepStmtCacheSize", "250"); -config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048"); -config.addDataSourceProperty("useServerPrepStmts", "true"); - -HikariDataSource ds = new HikariDataSource(config); - -try (Connection conn = ds.getConnection(); - PreparedStatement ps = conn.prepareStatement("SELECT * FROM users WHERE id = ?")) { - ps.setInt(1, 42); - try (ResultSet rs = ps.executeQuery()) { - // process results - } -} -``` - -### Spring Boot Auto-Configuration - -```yaml -# application.yml -spring: - datasource: - url: jdbc:mysql://localhost:3306/mydb?useSSL=true - username: app_user - password: secret - hikari: - maximum-pool-size: 20 - minimum-idle: 5 - connection-timeout: 10000 -``` - ---- - -## Go - -### go-sql-driver/mysql - -```go -package main - -import ( - "database/sql" - "fmt" - _ "github.com/go-sql-driver/mysql" -) - -func main() { - // DSN format: user:password@tcp(host:port)/dbname?params - dsn := "app_user:secret@tcp(localhost:3306)/mydb?charset=utf8mb4&parseTime=true&loc=UTC" - db, err := sql.Open("mysql", dsn) - if err != nil { - panic(err) - } - defer db.Close() - - // Connection pool settings. - db.SetMaxOpenConns(25) - db.SetMaxIdleConns(10) - db.SetConnMaxLifetime(5 * time.Minute) - db.SetConnMaxIdleTime(3 * time.Minute) - - // Prepared statement. - var name string - err = db.QueryRow("SELECT name FROM users WHERE id = ?", 42).Scan(&name) - if err != nil { - panic(err) - } - fmt.Println(name) - - // Transaction. - tx, err := db.Begin() - if err != nil { - panic(err) - } - _, err = tx.Exec("INSERT INTO orders (customer_id, total) VALUES (?, ?)", 42, 99.99) - if err != nil { - tx.Rollback() - panic(err) - } - tx.Commit() -} -``` - ---- - -## Connection Pooling - -### ProxySQL - -```sql --- ProxySQL sits between the application and MySQL, providing --- connection multiplexing, query routing, and caching. - --- Add backend servers via ProxySQL admin interface (port 6032). -INSERT INTO mysql_servers (hostgroup_id, hostname, port) VALUES (10, 'mysql-primary', 3306); -INSERT INTO mysql_servers (hostgroup_id, hostname, port) VALUES (20, 'mysql-replica1', 3306); -INSERT INTO mysql_servers (hostgroup_id, hostname, port) VALUES (20, 'mysql-replica2', 3306); -LOAD MYSQL SERVERS TO RUNTIME; - --- Query routing: send reads to replicas, writes to primary. -INSERT INTO mysql_query_rules (rule_id, match_pattern, destination_hostgroup) -VALUES (1, '^SELECT.*FOR UPDATE', 10), -- SELECT FOR UPDATE -> primary - (2, '^SELECT', 20); -- other SELECTs -> replicas -LOAD MYSQL QUERY RULES TO RUNTIME; -``` - -### MySQL Router - -```ini -# MySQL Router configuration for InnoDB Cluster. -# Typically bootstrapped automatically: -# mysqlrouter --bootstrap root@primary:3306 --directory /etc/mysqlrouter - -[routing:primary] -bind_address = 0.0.0.0 -bind_port = 6446 -destinations = metadata-cache://mycluster/?role=PRIMARY -routing_strategy = first-available - -[routing:secondary] -bind_address = 0.0.0.0 -bind_port = 6447 -destinations = metadata-cache://mycluster/?role=SECONDARY -routing_strategy = round-robin-with-fallback -``` - -### Pool Sizing Guidelines - -- **Formula:** connections = ((core_count * 2) + effective_spindle_count) -- For SSD: effective_spindle_count ~ 200 (but CPU becomes the bottleneck first) -- Start with 10-20 connections per application instance; measure and adjust -- Monitor `Threads_connected` vs `max_connections` -- Watch for `Threads_running` spikes (indicates contention, not a need for more connections) - ---- - -## SSL/TLS Connections - -```sql --- Check SSL status on the server. -SHOW VARIABLES LIKE '%ssl%'; -SHOW STATUS LIKE 'Ssl_cipher'; - --- Require SSL for a user. -ALTER USER 'app_user'@'%' REQUIRE SSL; - --- Require specific certificate (mutual TLS). -ALTER USER 'app_user'@'%' REQUIRE X509; -``` - -```python -# Python SSL connection. -conn = mysql.connector.connect( - host="db.example.com", - user="app_user", - password="secret", - database="mydb", - ssl_ca="/path/to/ca-cert.pem", - ssl_cert="/path/to/client-cert.pem", - ssl_key="/path/to/client-key.pem", -) -``` - -```javascript -// Node.js SSL connection. -const conn = await mysql.createConnection({ - host: 'db.example.com', - user: 'app_user', - password: 'secret', - database: 'mydb', - ssl: { - ca: fs.readFileSync('/path/to/ca-cert.pem'), - cert: fs.readFileSync('/path/to/client-cert.pem'), - key: fs.readFileSync('/path/to/client-key.pem'), - }, -}); -``` - -### Authentication Plugins - -| Plugin | Default In | Notes | -|---|---|---| -| `caching_sha2_password` | 8.0+ | Default. Requires SSL or RSA key exchange on first connect | -| `mysql_native_password` | 5.7 | Legacy. Still works but deprecated in 8.0 | -| `auth_socket` / `unix_socket` | MariaDB | Authenticate via OS user (no password needed) | - -```sql --- Switch a user's auth plugin (e.g., for legacy client compatibility). -ALTER USER 'legacy_app'@'%' IDENTIFIED WITH mysql_native_password BY 'secret'; -``` - ---- - -## Official References - -- Connector/Python: -- mysql2 (Node.js): -- Connector/J (Java): -- go-sql-driver/mysql: -- ProxySQL: -- MySQL Router: diff --git a/plugins/flow/skills/mysql/references/innodb.md b/plugins/flow/skills/mysql/references/innodb.md deleted file mode 100644 index e2241af..0000000 --- a/plugins/flow/skills/mysql/references/innodb.md +++ /dev/null @@ -1,310 +0,0 @@ -# InnoDB Internals - -## Overview - -InnoDB is MySQL's default transactional storage engine. Understanding its architecture is essential for performance tuning, capacity planning, and diagnosing lock contention. This reference covers the internals that directly affect day-to-day development and operations. - ---- - -## Clustered Index Architecture - -In InnoDB, the primary key IS the table. Data rows are stored in primary key order within the clustered index (B+tree leaf pages). - -```text -Clustered Index (B+tree): - Internal pages: [PK pointers] - Leaf pages: [PK | col1 | col2 | col3 | ...] <- actual row data - -Secondary Index: - Leaf pages: [indexed_col | PK] <- stores PK, not row pointer -``` - -**Implications:** - -- A secondary index lookup requires two B+tree traversals: secondary index -> PK, then clustered index -> row. -- Sequential PK inserts (AUTO_INCREMENT) append to the end of the clustered index = fast. -- Random PK inserts (UUIDs) cause page splits throughout the tree = slow, fragmented. -- Covering indexes avoid the second lookup entirely. - -### UUID Primary Key Workaround - -```sql --- If UUIDs are required, use ordered UUIDs (UUID v7 or ORDERED_UUID). --- MySQL 8.0 provides UUID_TO_BIN with swap flag to make UUIDs ordered. -CREATE TABLE entities ( - id BINARY(16) PRIMARY KEY DEFAULT (UUID_TO_BIN(UUID(), 1)), - name VARCHAR(200) -); - --- Read back as human-readable UUID. -SELECT BIN_TO_UUID(id, 1) AS uuid, name FROM entities; -``` - ---- - -## Row Formats - -| Format | Default In | Max Row Size | Notes | -|---|---|---|---| -| `DYNAMIC` | 8.0+ | ~8KB inline | Long columns stored off-page. Best general-purpose choice | -| `COMPACT` | 5.0-5.7 | ~8KB inline | Similar to DYNAMIC but different off-page threshold | -| `COMPRESSED` | - | ~8KB inline | Applies zlib to data and index pages. Trades CPU for I/O | -| `REDUNDANT` | Pre-5.0 | ~8KB inline | Legacy format. No advantage over DYNAMIC | - -```sql --- Check current row format. -SELECT TABLE_NAME, ROW_FORMAT - FROM information_schema.TABLES - WHERE TABLE_SCHEMA = 'mydb'; - --- Set row format. -ALTER TABLE large_text_table ROW_FORMAT = DYNAMIC; -``` - -**Off-page storage:** When a row exceeds the page size (16KB default), InnoDB stores long VARCHAR/BLOB/TEXT columns on overflow pages, keeping only a 20-byte pointer inline. - ---- - -## Buffer Pool - -The buffer pool is InnoDB's main memory cache. It holds data pages, index pages, undo pages, the change buffer, and the adaptive hash index. - -### Page Types in the Buffer Pool - -- **Data pages:** Clustered index leaf pages containing actual rows -- **Index pages:** Secondary index pages -- **Undo pages:** Previous row versions for MVCC -- **Change buffer:** Buffered secondary index changes (reduces random I/O) -- **Adaptive hash index:** Automatically built hash index for hot pages - -### LRU Algorithm - -InnoDB uses a modified LRU with a midpoint insertion strategy: - -1. New pages enter at the 3/8 point (midpoint) of the LRU list, not the head. -2. Pages are promoted to the head only after being accessed again after `innodb_old_blocks_time` (default 1000ms). -3. This prevents a full table scan from evicting hot pages. - -### Key Configuration - -```sql --- Buffer pool size (primary tuning knob, 70-80% of RAM on dedicated server). -innodb_buffer_pool_size = 12G - --- Multiple instances reduce mutex contention (1 per GB, max 64). -innodb_buffer_pool_instances = 8 - --- Dump/load buffer pool state for fast warm-up after restart. -innodb_buffer_pool_dump_at_shutdown = ON -innodb_buffer_pool_load_at_startup = ON - --- Change buffer: buffers changes to secondary indexes. Disable if --- workload is read-heavy with few secondary index updates. -innodb_change_buffering = all -- none | inserts | deletes | changes | purges | all -``` - -### Monitoring - -```sql --- Buffer pool hit ratio (should be > 99% for OLTP). -SHOW ENGINE INNODB STATUS\G --- Look for: Buffer pool hit rate XXXX / 1000 - --- Detailed stats from information_schema. -SELECT - POOL_ID, POOL_SIZE, FREE_BUFFERS, DATABASE_PAGES, - PAGES_MADE_YOUNG, PAGES_NOT_MADE_YOUNG, - HIT_RATE, YOUNG_MAKE_PER_THOUSAND_GETS - FROM information_schema.INNODB_BUFFER_POOL_STATS; -``` - ---- - -## Redo Log and Doublewrite Buffer - -### Redo Log - -The redo log (WAL) records all changes before they are written to data files, ensuring crash recovery. - -```sql --- Redo log sizing (8.0.30+: automatic sizing by default). -innodb_redo_log_capacity = 2G -- total redo log space (8.0.30+) - --- Pre-8.0.30: sized via file count and file size. -innodb_log_file_size = 512M -- per file -innodb_log_files_in_group = 2 -- number of files (total = 1 GB) - --- Flush behavior (trade durability for performance). -innodb_flush_log_at_trx_commit = 1 -- 1 = flush every commit (safest) - -- 2 = flush every second (fast, risk ~1s data loss) - -- 0 = flush every second, no sync (fastest, risk data loss) -``` - -**Monitoring checkpoint age:** - -```sql --- If checkpoint age approaches redo log capacity, writes stall. -SHOW ENGINE INNODB STATUS\G --- Look for: Log sequence number, Last checkpoint at --- Difference = checkpoint age -``` - -### Doublewrite Buffer - -InnoDB writes pages to the doublewrite buffer before writing to their final location. This protects against partial page writes (torn pages) during a crash. - -```sql --- Doublewrite is enabled by default. Disable only on filesystems with --- atomic writes (e.g., ZFS, FusionIO with atomic write support). -innodb_doublewrite = ON -``` - ---- - -## MVCC and Transaction Isolation - -InnoDB uses Multi-Version Concurrency Control (MVCC) to provide consistent reads without blocking writes. - -### How MVCC Works - -1. Each row has hidden columns: `DB_TRX_ID` (last modifying transaction) and `DB_ROLL_PTR` (pointer to undo log). -2. A consistent read constructs a snapshot at the read's start time. -3. Old row versions are stored in the undo log and chained via `DB_ROLL_PTR`. -4. `PURGE` thread removes undo records no longer needed by any active transaction. - -### Isolation Levels - -| Level | Dirty Read | Non-Repeatable Read | Phantom Read | Locking Behavior | -|---|---|---|---|---| -| `READ UNCOMMITTED` | Yes | Yes | Yes | No MVCC snapshot | -| `READ COMMITTED` | No | Yes | Yes | Fresh snapshot per statement | -| `REPEATABLE READ` (default) | No | No | No* | Snapshot at first read; gap locks prevent phantoms | -| `SERIALIZABLE` | No | No | No | All reads are `SELECT ... FOR SHARE` | - -*MySQL's REPEATABLE READ prevents phantoms through gap locking, unlike the SQL standard which allows them. - -```sql --- Set isolation level. -SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; - --- Check current level. -SELECT @@transaction_isolation; -``` - -### Gap Locks and Next-Key Locks - -```sql --- In REPEATABLE READ, InnoDB uses next-key locks (record lock + gap lock) --- on index ranges to prevent phantom rows. - --- This locks the gap where status = 'pending' to prevent inserts: -SELECT * FROM orders WHERE status = 'pending' FOR UPDATE; - --- Gap locks can cause unexpected lock waits. Switch to READ COMMITTED --- if phantom protection is not needed (common for web applications). -``` - ---- - -## Deadlock Detection and Handling - -### Automatic Detection - -InnoDB detects deadlocks automatically by default and rolls back the transaction with the fewest undo log records (least work). - -```sql --- Deadlock detection (enabled by default, disable only for very high concurrency --- where detection overhead is measurable — rare). -innodb_deadlock_detect = ON - --- Alternative: use innodb_lock_wait_timeout as a safety net. -innodb_lock_wait_timeout = 50 -- seconds (default) -``` - -### Diagnosing Deadlocks - -```sql --- Show the most recent deadlock. -SHOW ENGINE INNODB STATUS\G --- Look for: LATEST DETECTED DEADLOCK section - --- Enable deadlock logging to error log. -innodb_print_all_deadlocks = ON -``` - -### Deadlock Prevention Patterns - -1. **Access tables in consistent order** across all transactions. -2. **Keep transactions short** — acquire locks, do work, commit immediately. -3. **Use `SELECT ... FOR UPDATE`** early to acquire locks predictably. -4. **Retry on deadlock** — SQLSTATE '40001' / errno 1213 means retry is safe. - -```python -# Python retry pattern for deadlocks. -import time -MAX_RETRIES = 3 - -for attempt in range(MAX_RETRIES): - try: - conn.start_transaction() - cursor.execute("UPDATE accounts SET balance = balance - %s WHERE id = %s", (100, 1)) - cursor.execute("UPDATE accounts SET balance = balance + %s WHERE id = %s", (100, 2)) - conn.commit() - break - except mysql.connector.errors.InternalError as e: - if e.errno == 1213 and attempt < MAX_RETRIES - 1: - conn.rollback() - time.sleep(0.1 * (attempt + 1)) - else: - raise -``` - ---- - -## Table Compression - -### ROW_FORMAT=COMPRESSED - -```sql --- Compress an entire table. Reduces disk I/O at the cost of CPU. --- KEY_BLOCK_SIZE determines the compressed page size (1, 2, 4, 8 KB). -CREATE TABLE archive_logs ( - id BIGINT AUTO_INCREMENT PRIMARY KEY, - log_entry TEXT, - created_at TIMESTAMP -) ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; - --- Monitor compression effectiveness. -SELECT * FROM information_schema.INNODB_CMP; --- Look for COMPRESS_OPS_OK / COMPRESS_OPS ratio. If < 90%, try a larger KEY_BLOCK_SIZE. -``` - -### Page Compression (Transparent, 5.7+) - -```sql --- Page compression uses hole punching at the filesystem level. --- Requires a filesystem that supports sparse files (ext4, xfs, btrfs). -CREATE TABLE large_data ( - id BIGINT PRIMARY KEY, - data BLOB -) COMPRESSION='zlib'; -- or 'lz4', 'none' - -ALTER TABLE large_data COMPRESSION='lz4'; -OPTIMIZE TABLE large_data; -- required to actually recompress existing pages -``` - -**When to use compression:** - -- ROW_FORMAT=COMPRESSED: archival tables, read-heavy workloads, I/O-bound systems. -- Page compression: when filesystem supports hole punching; better compression ratio than ROW_FORMAT=COMPRESSED. -- Neither: write-heavy OLTP workloads where CPU is the bottleneck. - ---- - -## Official References - -- InnoDB Architecture: -- InnoDB Locking: -- InnoDB Buffer Pool: -- InnoDB Redo Log: -- InnoDB Compression: diff --git a/plugins/flow/skills/mysql/references/json.md b/plugins/flow/skills/mysql/references/json.md deleted file mode 100644 index 0bf9f0c..0000000 --- a/plugins/flow/skills/mysql/references/json.md +++ /dev/null @@ -1,288 +0,0 @@ -# JSON in MySQL - -## Overview - -MySQL supports a native JSON data type (5.7+) with a rich set of functions for creating, querying, modifying, and indexing JSON documents. This reference covers the full JSON workflow from storage through indexing and aggregation. - ---- - -## JSON Data Type - -```sql --- The JSON column stores validated, binary-encoded JSON. --- Invalid JSON is rejected at INSERT time. -CREATE TABLE products ( - id INT AUTO_INCREMENT PRIMARY KEY, - name VARCHAR(200) NOT NULL, - attrs JSON NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -INSERT INTO products (name, attrs) VALUES -('Widget', '{"color": "red", "weight": 1.5, "tags": ["sale", "popular"]}'), -('Gadget', '{"color": "blue", "weight": 3.2, "tags": ["new"], "dimensions": {"w": 10, "h": 5}}'); - --- Maximum JSON document size is limited by max_allowed_packet (default 64MB). -``` - ---- - -## JSON Extraction - -### -> and ->> Operators - -```sql --- -> returns a JSON value (quoted strings, typed numbers). -SELECT attrs->'$.color' FROM products; --- Result: "red" (JSON string, with quotes) - --- ->> returns an unquoted string (equivalent to JSON_UNQUOTE(JSON_EXTRACT(...))). -SELECT attrs->>'$.color' FROM products; --- Result: red (plain text, no quotes) - --- Nested access. -SELECT attrs->>'$.dimensions.w' FROM products; - --- Array element access (zero-indexed). -SELECT attrs->>'$.tags[0]' AS first_tag FROM products; -``` - -### JSON_EXTRACT - -```sql --- JSON_EXTRACT supports multiple paths in one call. -SELECT JSON_EXTRACT(attrs, '$.color', '$.weight') FROM products; --- Result: ["red", 1.5] - --- Wildcard: extract all values at a path. -SELECT JSON_EXTRACT(attrs, '$.tags[*]') FROM products; -``` - ---- - -## JSON Modification - -### JSON_SET, JSON_INSERT, JSON_REPLACE, JSON_REMOVE - -```sql --- JSON_SET: insert or replace (upsert behavior). -UPDATE products - SET attrs = JSON_SET(attrs, '$.color', 'green', '$.rating', 4.5) - WHERE id = 1; - --- JSON_INSERT: insert only (does not overwrite existing keys). -UPDATE products - SET attrs = JSON_INSERT(attrs, '$.color', 'yellow', '$.brand', 'Acme') - WHERE id = 1; --- color stays 'green' (already exists), brand is added. - --- JSON_REPLACE: replace only (does not create new keys). -UPDATE products - SET attrs = JSON_REPLACE(attrs, '$.color', 'purple', '$.nonexistent', 'ignored') - WHERE id = 1; --- color becomes 'purple', nonexistent is not created. - --- JSON_REMOVE: delete one or more paths. -UPDATE products - SET attrs = JSON_REMOVE(attrs, '$.rating', '$.tags[0]') - WHERE id = 1; -``` - -### JSON_ARRAY_APPEND / JSON_ARRAY_INSERT - -```sql --- Append to an existing JSON array. -UPDATE products - SET attrs = JSON_ARRAY_APPEND(attrs, '$.tags', 'clearance') - WHERE id = 1; - --- Insert at a specific array position. -UPDATE products - SET attrs = JSON_ARRAY_INSERT(attrs, '$.tags[0]', 'featured') - WHERE id = 1; -``` - ---- - -## JSON_TABLE (8.0+) - -JSON_TABLE shreds a JSON document into relational rows and columns, making it usable in JOINs, WHERE clauses, and aggregations. - -```sql --- Shred a JSON array into rows. -SELECT p.id, p.name, jt.tag - FROM products p, - JSON_TABLE(p.attrs, '$.tags[*]' COLUMNS ( - tag VARCHAR(100) PATH '$' - )) AS jt; - --- Multiple columns with error handling. -SELECT p.id, jt.* - FROM products p, - JSON_TABLE(p.attrs, '$' COLUMNS ( - color VARCHAR(50) PATH '$.color' DEFAULT '"unknown"' ON EMPTY, - weight DECIMAL(10,2) PATH '$.weight' DEFAULT '0' ON ERROR, - tag_count INT PATH '$.tags' ERROR ON ERROR - )) AS jt; - --- Nested path for arrays within objects. -SELECT o.id, items.product, items.qty, items.discount - FROM orders o, - JSON_TABLE(o.line_items, '$[*]' COLUMNS ( - product VARCHAR(100) PATH '$.name', - qty INT PATH '$.quantity', - NESTED PATH '$.discounts[*]' COLUMNS ( - discount DECIMAL(5,2) PATH '$.amount' - ) - )) AS items; -``` - ---- - -## Multi-Valued Indexes (8.0.17+) - -Multi-valued indexes allow indexing individual elements within a JSON array, enabling efficient lookups on array membership. - -```sql --- Create a multi-valued index on a JSON array. -CREATE TABLE products_v2 ( - id INT AUTO_INCREMENT PRIMARY KEY, - name VARCHAR(200), - attrs JSON, - INDEX idx_tags ((CAST(attrs->>'$.tags' AS CHAR(50) ARRAY))) -); - --- This query can use the multi-valued index. -SELECT * FROM products_v2 - WHERE JSON_CONTAINS(attrs->'$.tags', '"sale"'); - --- MEMBER OF operator (also uses multi-valued index). -SELECT * FROM products_v2 - WHERE 'sale' MEMBER OF (attrs->'$.tags'); - --- JSON_OVERLAPS: check if any element matches. -SELECT * FROM products_v2 - WHERE JSON_OVERLAPS(attrs->'$.tags', '["sale", "new"]'); -``` - ---- - -## JSON Aggregation - -### JSON_ARRAYAGG - -```sql --- Aggregate column values into a JSON array. -SELECT department_id, - JSON_ARRAYAGG(name) AS team_members - FROM employees - GROUP BY department_id; - --- With ordering (wrap in a subquery since JSON_ARRAYAGG has no ORDER BY). -SELECT department_id, - JSON_ARRAYAGG(name) AS team_members - FROM (SELECT department_id, name FROM employees ORDER BY name) sub - GROUP BY department_id; -``` - -### JSON_OBJECTAGG - -```sql --- Aggregate key-value pairs into a JSON object. -SELECT JSON_OBJECTAGG(setting_key, setting_value) AS config - FROM app_settings - WHERE app_id = 1; - --- Result: {"theme": "dark", "language": "en", "timezone": "UTC"} -``` - -### Building Complex JSON - -```sql --- Construct nested JSON objects using JSON_OBJECT and JSON_ARRAYAGG. -SELECT JSON_OBJECT( - 'department', d.name, - 'employee_count', COUNT(e.id), - 'employees', JSON_ARRAYAGG( - JSON_OBJECT('id', e.id, 'name', e.name, 'salary', e.salary) - ) -) AS dept_json - FROM departments d - JOIN employees e ON e.department_id = d.id - GROUP BY d.id, d.name; -``` - ---- - -## JSON Schema Validation (8.0.17+) - -```sql --- Validate JSON documents against a schema using CHECK constraints. -CREATE TABLE events ( - id INT AUTO_INCREMENT PRIMARY KEY, - data JSON NOT NULL, - CONSTRAINT chk_event_schema CHECK ( - JSON_SCHEMA_VALID('{ - "type": "object", - "required": ["event_type", "timestamp"], - "properties": { - "event_type": {"type": "string", "enum": ["click", "view", "purchase"]}, - "timestamp": {"type": "string", "format": "date-time"}, - "user_id": {"type": "integer"} - } - }', data) - ) -); - --- Valid insert. -INSERT INTO events (data) VALUES ('{"event_type": "click", "timestamp": "2026-03-26T10:00:00Z", "user_id": 42}'); - --- Invalid insert (rejected by CHECK constraint). -INSERT INTO events (data) VALUES ('{"event_type": "invalid"}'); --- ERROR: Check constraint 'chk_event_schema' is violated. - --- Validate without a constraint (returns 1 or 0). -SELECT JSON_SCHEMA_VALID('{"type": "object"}', '{"key": "value"}') AS is_valid; - --- Get detailed validation errors. -SELECT JSON_SCHEMA_VALIDATION_REPORT('{"type": "integer"}', '"not_an_int"') AS report; -``` - ---- - -## Generated Columns for Indexing JSON Values - -When you need to index a specific JSON path, extract it into a generated virtual column and index that column. - -```sql --- Virtual generated column: computed on read, zero storage overhead. -ALTER TABLE products - ADD color VARCHAR(50) AS (attrs->>'$.color') VIRTUAL; - -CREATE INDEX idx_products_color ON products (color); - --- Now this query uses the index: -SELECT * FROM products WHERE color = 'red'; - --- Stored generated column: computed on write, persisted to disk. --- Required if the expression is non-deterministic. -ALTER TABLE products - ADD weight DECIMAL(10,2) AS (CAST(attrs->>'$.weight' AS DECIMAL(10,2))) STORED; - -CREATE INDEX idx_products_weight ON products (weight); -``` - -**When to use generated columns vs multi-valued indexes:** - -- Generated columns: for scalar JSON values (strings, numbers) that you filter/sort on frequently. -- Multi-valued indexes: for JSON arrays where you need `MEMBER OF` or `JSON_CONTAINS` queries. - ---- - -## Official References - -- JSON Data Type: -- JSON Functions: -- JSON_TABLE: -- Multi-Valued Indexes: -- JSON Schema Validation: diff --git a/plugins/flow/skills/mysql/references/mysql_cli.md b/plugins/flow/skills/mysql/references/mysql_cli.md deleted file mode 100644 index 35534f0..0000000 --- a/plugins/flow/skills/mysql/references/mysql_cli.md +++ /dev/null @@ -1,408 +0,0 @@ -# MySQL CLI & Tools - -## Overview - -This reference covers the mysql command-line client, modern alternatives, MySQL Shell, and essential third-party tools for schema migrations and query analysis. - ---- - -## mysql Client - -### Connecting - -```bash -# Basic connection. -mysql -u app_user -p -h localhost -P 3306 mydb - -# With explicit password (avoid in scripts; use option files instead). -mysql -u app_user -psecret mydb - -# Using option files (~/.my.cnf). -# [client] -# user = app_user -# password = secret -# host = localhost -# database = mydb -mysql # reads from ~/.my.cnf - -# Connect via Unix socket. -mysql -u root --socket=/var/run/mysqld/mysqld.sock - -# Connect with SSL. -mysql -u app_user -p --ssl-ca=/path/to/ca.pem --ssl-mode=REQUIRED mydb - -# Execute a single command and exit. -mysql -u root -p -e "SHOW DATABASES;" - -# Execute from a file. -mysql -u root -p mydb < schema.sql -``` - -### Useful Options - -| Flag | Description | -|---|---| -| `-A` / `--no-auto-rehash` | Skip table/column name completion (faster startup on large schemas) | -| `-B` / `--batch` | Tab-separated output, no borders (for scripting) | -| `-N` / `--skip-column-names` | Omit column headers in output | -| `-t` / `--table` | Force table-format output (even in batch mode) | -| `--safe-updates` | Prevent UPDATE/DELETE without WHERE clause | -| `--connect-timeout=N` | Connection timeout in seconds | -| `--max-allowed-packet=N` | Max packet size (increase for large INSERTs) | - -### Interactive Commands - -```sql --- Vertical output (one column per line, great for wide rows). -SELECT * FROM orders WHERE id = 1\G - --- Show warnings after a statement. -SHOW WARNINGS; - --- Switch database. -USE mydb; - --- Source a SQL file. -SOURCE /path/to/script.sql; -\. /path/to/script.sql - --- Log session output to a file. -TEE /tmp/session.log; --- ... run queries ... -NOTEE; - --- Execute a shell command without leaving mysql. -\! ls -la /var/lib/mysql - --- Show current connection info. -\s -STATUS; - --- Clear the current input buffer. -\c - --- Enable/disable query timing. --- Timing is shown by default; toggle with: -\R mysql> -- change prompt -``` - -### Pager and Output Formatting - -```bash -# Use less as pager for long output. -mysql> PAGER less -SFX; - -# Pipe output through a command. -mysql> PAGER grep -i error; -SELECT * FROM error_log; -mysql> NOPAGER; - -# Save query results to a file. -mysql> TEE /tmp/results.txt; -SELECT * FROM large_table; -mysql> NOTEE; -``` - -### .my.cnf for Convenience - -```ini -# ~/.my.cnf (chmod 600) -[client] -user = app_user -password = secret -host = localhost -default-character-set = utf8mb4 - -[mysql] -auto-rehash = FALSE -prompt = "\\u@\\h [\\d]> " -pager = "less -SFX" -safe-updates -``` - ---- - -## mycli — Modern MySQL Client - -mycli is a drop-in replacement for the mysql client with auto-completion, syntax highlighting, and smart suggestions. - -```bash -# Install. -pip install mycli - -# Connect (same syntax as mysql). -mycli -u app_user -p -h localhost mydb - -# Features: -# - Context-aware auto-completion (tables, columns, keywords) -# - Syntax highlighting -# - Multi-line mode -# - Vi/Emacs key bindings -# - Favorites: save and recall named queries - -# Configuration: ~/.myclirc -``` - ---- - -## MySQL Shell (mysqlsh) - -MySQL Shell is Oracle's advanced client supporting SQL, JavaScript, and Python modes, plus administrative utilities. - -### Connecting - -```bash -# SQL mode (default). -mysqlsh root@localhost:3306 --sql - -# JavaScript mode. -mysqlsh root@localhost --js - -# Python mode. -mysqlsh root@localhost --py - -# URI format. -mysqlsh mysql://root@localhost:3306/mydb - -# Switch modes inside the shell. -\sql -\js -\py -``` - -### SQL Mode - -```sql --- Standard SQL works as expected. -mysqlsh> SELECT * FROM users LIMIT 5; - --- Vertical output. -mysqlsh> SELECT * FROM users WHERE id = 1\G - --- Run a SQL file. -mysqlsh> \source /path/to/script.sql -``` - -### JavaScript/Python Mode - -```javascript -// JavaScript mode. -var session = mysql.getClassicSession('root@localhost:3306'); -var result = session.runSql('SELECT * FROM users LIMIT 5'); -var row = result.fetchOne(); -print(row); - -// X DevAPI (document store). -var db = session.getSchema('mydb'); -var collection = db.createCollection('test_docs'); -collection.add({name: 'Alice', age: 30}).execute(); -collection.find('age > 25').execute(); -``` - -### Utility Commands - -```bash -# Check server readiness for upgrade. -mysqlsh root@localhost -- util checkForServerUpgrade - -# Dump and load (see admin.md for full details). -mysqlsh root@localhost -- util dumpInstance /backup/full --threads=8 -mysqlsh root@localhost -- util loadDump /backup/full --threads=8 - -# Import JSON documents into a collection or table. -mysqlsh root@localhost -- util importJson /data/docs.json --schema=mydb --collection=docs - -# Import CSV/TSV into a table. -mysqlsh root@localhost -- util importTable /data/users.csv \ - --schema=mydb --table=users --columns='id,name,email' --dialect=csv -``` - ---- - -## Percona Toolkit - -### pt-query-digest - -```bash -# Analyze slow query log. -pt-query-digest /var/log/mysql/slow.log - -# Analyze from PROCESSLIST. -pt-query-digest --processlist h=localhost,u=root,p=secret - -# Filter and sort. -pt-query-digest --order-by Query_time:sum --limit 20 /var/log/mysql/slow.log - -# Output as JSON. -pt-query-digest --output json /var/log/mysql/slow.log -``` - -### pt-online-schema-change - -```bash -# Alter a large table without blocking writes. -# Creates a shadow table, copies data in chunks, swaps via rename. -pt-online-schema-change \ - --alter "ADD COLUMN phone VARCHAR(20), ADD INDEX idx_phone (phone)" \ - --execute \ - D=mydb,t=users,h=localhost,u=root,p=secret - -# Dry run first. -pt-online-schema-change \ - --alter "ADD COLUMN phone VARCHAR(20)" \ - --dry-run \ - D=mydb,t=users,h=localhost,u=root,p=secret - -# Control chunk size and sleep time. -pt-online-schema-change \ - --alter "DROP COLUMN legacy_field" \ - --chunk-size 1000 \ - --sleep 0.5 \ - --execute \ - D=mydb,t=orders,h=localhost,u=root,p=secret -``` - -### Other Percona Toolkit Utilities - -```bash -# pt-table-checksum: verify replica data consistency. -pt-table-checksum --replicate=percona.checksums h=primary,u=root,p=secret - -# pt-table-sync: repair data inconsistencies between source and replica. -pt-table-sync --execute --replicate=percona.checksums h=primary,u=root,p=secret - -# pt-kill: kill long-running queries. -pt-kill --busy-time 60 --kill --print h=localhost,u=root,p=secret - -# pt-stalk: collect diagnostics when a condition is met. -pt-stalk --function status --variable Threads_running --threshold 50 -- \ - --collect --dest /tmp/pt-stalk - -# pt-archiver: archive old rows from a table. -pt-archiver --source h=localhost,D=mydb,t=orders \ - --dest h=archive-host,D=mydb,t=orders_archive \ - --where "created_at < '2025-01-01'" \ - --limit 1000 --commit-each -``` - ---- - -## gh-ost: GitHub's Online Schema Migration - -gh-ost uses the binary log to capture changes (instead of triggers like pt-online-schema-change), providing a more controllable and pausable migration. - -```bash -# Basic usage: add a column to a large table. -gh-ost \ - --host=localhost \ - --user=root \ - --password=secret \ - --database=mydb \ - --table=users \ - --alter="ADD COLUMN phone VARCHAR(20)" \ - --execute - -# Throttle based on replication lag. -gh-ost \ - --host=localhost \ - --user=root \ - --password=secret \ - --database=mydb \ - --table=orders \ - --alter="ADD INDEX idx_status (status)" \ - --max-lag-millis=1500 \ - --throttle-control-replicas="replica1:3306" \ - --execute - -# Test mode (no-op, validates the migration plan). -gh-ost \ - --host=localhost \ - --user=root \ - --password=secret \ - --database=mydb \ - --table=orders \ - --alter="ADD COLUMN notes TEXT" \ - --test-on-replica \ - --execute - -# Interactive control during migration. -# gh-ost creates a Unix socket; send commands to it: -echo "throttle" | nc -U /tmp/gh-ost.mydb.orders.sock -echo "no-throttle" | nc -U /tmp/gh-ost.mydb.orders.sock -echo "status" | nc -U /tmp/gh-ost.mydb.orders.sock -``` - ---- - -## mysqladmin, mysqlcheck, mysqlimport - -### mysqladmin - -```bash -# Check if server is alive. -mysqladmin -u root -p ping - -# Server status summary. -mysqladmin -u root -p status -mysqladmin -u root -p extended-status # SHOW GLOBAL STATUS equivalent - -# Process list. -mysqladmin -u root -p processlist - -# Kill a connection. -mysqladmin -u root -p kill 12345 - -# Flush operations. -mysqladmin -u root -p flush-logs # rotate binary/error logs -mysqladmin -u root -p flush-privileges # reload grant tables - -# Create/drop database. -mysqladmin -u root -p create testdb -mysqladmin -u root -p drop testdb - -# Shut down the server. -mysqladmin -u root -p shutdown -``` - -### mysqlcheck - -```bash -# Check all tables in a database. -mysqlcheck -u root -p mydb - -# Analyze all tables (update statistics). -mysqlcheck -u root -p --analyze mydb - -# Optimize all tables (reclaim space). -mysqlcheck -u root -p --optimize mydb - -# Check all databases. -mysqlcheck -u root -p --all-databases -``` - -### mysqlimport - -```bash -# Bulk load data from a file (wraps LOAD DATA INFILE). -# File name must match table name (users.txt -> users table). -mysqlimport -u root -p --local --fields-terminated-by=',' \ - --lines-terminated-by='\n' mydb /path/to/users.txt - -# With column specification. -mysqlimport -u root -p --local --columns='id,name,email' \ - --fields-terminated-by=',' mydb /path/to/users.csv - -# Replace existing rows on duplicate key. -mysqlimport -u root -p --local --replace mydb /path/to/users.txt -``` - ---- - -## Official References - -- mysql Client: -- MySQL Shell: -- mycli: -- Percona Toolkit: -- gh-ost: -- mysqladmin: diff --git a/plugins/flow/skills/mysql/references/performance.md b/plugins/flow/skills/mysql/references/performance.md deleted file mode 100644 index ffdc474..0000000 --- a/plugins/flow/skills/mysql/references/performance.md +++ /dev/null @@ -1,329 +0,0 @@ -# Performance Tuning - -## Overview - -This reference covers MySQL query analysis, indexing strategies, buffer pool tuning, slow query diagnosis, and optimizer behavior. Target audience: developers and DBAs optimizing MySQL 8.0+ workloads. - ---- - -## EXPLAIN / EXPLAIN ANALYZE - -### Basic EXPLAIN - -```sql --- EXPLAIN shows the query execution plan without running the query. -EXPLAIN SELECT c.name, COUNT(o.id) AS order_count - FROM customers c - JOIN orders o ON o.customer_id = c.id - WHERE c.status = 'active' - GROUP BY c.name; -``` - -### Key Columns in EXPLAIN Output - -| Column | What it means | -|----------------|------------------------------------------------------------------| -| `type` | Join type. Best to worst: `system` > `const` > `eq_ref` > `ref` > `range` > `index` > `ALL` | -| `possible_keys`| Indexes the optimizer considered | -| `key` | Index actually chosen | -| `key_len` | Bytes of the index used (shorter = fewer columns used) | -| `rows` | Estimated rows to examine | -| `filtered` | Percentage of rows remaining after WHERE conditions | -| `Extra` | Important flags: `Using index` (covering), `Using filesort`, `Using temporary`, `Using where` | - -### EXPLAIN ANALYZE (8.0.18+) - -```sql --- EXPLAIN ANALYZE actually executes the query and shows real timing. -EXPLAIN ANALYZE -SELECT c.name, COUNT(o.id) - FROM customers c - JOIN orders o ON o.customer_id = c.id - GROUP BY c.name; -``` - -Output shows estimated vs actual rows and time per iterator. Look for large discrepancies between estimated and actual rows, which indicate stale statistics. - -### EXPLAIN FORMAT=TREE / FORMAT=JSON - -```sql --- Tree format shows the iterator-based execution plan (8.0.16+). -EXPLAIN FORMAT=TREE SELECT ...; - --- JSON format includes cost estimates and detailed optimizer info. -EXPLAIN FORMAT=JSON SELECT ...; -``` - ---- - -## Index Strategy - -### B-tree Indexes (Default) - -```sql --- Single-column index. -CREATE INDEX idx_orders_customer ON orders (customer_id); - --- Composite index: leftmost prefix rule applies. --- This index supports queries filtering on (status), (status, created_at), --- or (status, created_at, total), but NOT (created_at) alone. -CREATE INDEX idx_orders_composite ON orders (status, created_at, total); - --- Prefix index for long strings (saves space, reduces selectivity). -CREATE INDEX idx_users_email_prefix ON users (email(20)); -``` - -### Covering Indexes - -```sql --- A covering index includes all columns needed by the query. --- EXPLAIN shows "Using index" in Extra column — no table lookup needed. -CREATE INDEX idx_orders_covering ON orders (customer_id, status, total); - --- This query is fully satisfied by the index: -SELECT customer_id, status, total FROM orders WHERE customer_id = 42; -``` - -### Fulltext Indexes - -```sql --- Fulltext indexes support natural language and boolean mode search. -CREATE FULLTEXT INDEX idx_articles_ft ON articles (title, body); - -SELECT id, title, MATCH(title, body) AGAINST('mysql performance' IN NATURAL LANGUAGE MODE) AS relevance - FROM articles - WHERE MATCH(title, body) AGAINST('mysql performance' IN NATURAL LANGUAGE MODE); - --- Boolean mode for precise control. -SELECT * FROM articles - WHERE MATCH(title, body) AGAINST('+mysql -oracle' IN BOOLEAN MODE); -``` - -### Invisible Indexes (8.0+) - -```sql --- Make an index invisible to the optimizer without dropping it. --- Use to test performance impact before removing an index. -ALTER TABLE orders ALTER INDEX idx_orders_status INVISIBLE; - --- Re-enable it. -ALTER TABLE orders ALTER INDEX idx_orders_status VISIBLE; - --- Optimizer can still be forced to use invisible indexes: -SET SESSION optimizer_switch = 'use_invisible_indexes=on'; -``` - -### Descending Indexes (8.0+) - -```sql --- Before 8.0, DESC was parsed but ignored. Now it creates a true descending index. --- Useful when queries sort some columns ASC and others DESC. -CREATE INDEX idx_scores ON leaderboard (game_id ASC, score DESC); -``` - -### Spatial Indexes - -```sql --- Spatial index on POINT/GEOMETRY columns (requires SRID). -ALTER TABLE locations ADD SPATIAL INDEX idx_locations_coords (coords); - -SELECT name, ST_Distance_Sphere(coords, ST_SRID(POINT(-73.99, 40.73), 4326)) AS distance_m - FROM locations - WHERE ST_Within(coords, ST_Buffer(ST_SRID(POINT(-73.99, 40.73), 4326), 0.01)) - ORDER BY distance_m - LIMIT 10; -``` - ---- - -## Slow Query Log - -### Configuration - -```sql --- Enable slow query log and set threshold. -SET GLOBAL slow_query_log = ON; -SET GLOBAL long_query_time = 1; -- seconds (default 10) -SET GLOBAL log_queries_not_using_indexes = ON; -SET GLOBAL slow_query_log_file = '/var/log/mysql/slow.log'; - --- Verify settings. -SHOW VARIABLES LIKE 'slow_query%'; -SHOW VARIABLES LIKE 'long_query_time'; -``` - -### Analysis with pt-query-digest - -```bash -# Summarize slow log: top queries by total time. -pt-query-digest /var/log/mysql/slow.log - -# Filter by time range. -pt-query-digest --since '2026-03-25 00:00:00' --until '2026-03-26 00:00:00' /var/log/mysql/slow.log - -# Analyze only queries touching a specific table. -pt-query-digest --filter '$event->{arg} =~ m/orders/' /var/log/mysql/slow.log -``` - ---- - -## InnoDB Buffer Pool - -### Sizing - -```sql --- Buffer pool should hold the working set. Start at 70-80% of available RAM --- on a dedicated MySQL server. -SET GLOBAL innodb_buffer_pool_size = 12884901888; -- 12 GB - --- Online resizing (8.0+): takes effect in chunks. --- Check progress: -SHOW STATUS LIKE 'Innodb_buffer_pool_resize_status'; -``` - -### Monitoring Hit Ratio - -```sql --- A hit ratio below 99% on an OLTP workload usually means the buffer pool is too small. -SELECT - (1 - (Innodb_buffer_pool_reads / Innodb_buffer_pool_read_requests)) * 100 AS hit_ratio_pct -FROM ( - SELECT - VARIABLE_VALUE AS Innodb_buffer_pool_reads - FROM performance_schema.global_status - WHERE VARIABLE_NAME = 'Innodb_buffer_pool_reads' -) a, -( - SELECT - VARIABLE_VALUE AS Innodb_buffer_pool_read_requests - FROM performance_schema.global_status - WHERE VARIABLE_NAME = 'Innodb_buffer_pool_read_requests' -) b; -``` - -### Multiple Buffer Pool Instances - -```sql --- Multiple instances reduce contention on the buffer pool mutex. --- Recommended: 1 instance per GB of buffer pool, up to 64. --- Only effective when buffer pool >= 1 GB. -SET GLOBAL innodb_buffer_pool_instances = 8; -``` - ---- - -## Query Optimizer - -### Optimizer Hints - -```sql --- Hint the optimizer to use or ignore specific indexes. -SELECT /*+ INDEX(orders idx_orders_customer) */ * - FROM orders - WHERE customer_id = 42; - -SELECT /*+ NO_INDEX(orders idx_orders_status) */ * - FROM orders - WHERE status = 'pending'; - --- Join order hints. -SELECT /*+ JOIN_ORDER(c, o, p) */ c.name, o.total, p.name - FROM customers c - JOIN orders o ON o.customer_id = c.id - JOIN products p ON p.id = o.product_id; - --- Other useful hints. -SELECT /*+ MAX_EXECUTION_TIME(5000) */ * FROM large_table; -- 5 second timeout -SELECT /*+ SET_VAR(sort_buffer_size = 16777216) */ * FROM big_sort ORDER BY col; -``` - -### Index Merge - -```sql --- MySQL can merge multiple indexes on the same table. --- EXPLAIN shows type=index_merge when this happens. --- Sometimes produces suboptimal plans; disable selectively if needed. -SELECT /*+ NO_INDEX_MERGE(orders) */ * - FROM orders - WHERE status = 'pending' OR customer_id = 42; -``` - -### Derived Table Optimization - -```sql --- MySQL 8.0 can merge derived tables into the outer query (derived_merge). --- If the optimizer incorrectly merges, disable it: -SELECT /*+ NO_MERGE(sub) */ * - FROM (SELECT customer_id, SUM(total) AS total FROM orders GROUP BY customer_id) sub - WHERE sub.total > 1000; -``` - ---- - -## Performance Schema - -### Key Tables - -```sql --- Top queries by total execution time. -SELECT DIGEST_TEXT, COUNT_STAR, SUM_TIMER_WAIT/1e12 AS total_sec, - AVG_TIMER_WAIT/1e12 AS avg_sec, SUM_ROWS_EXAMINED - FROM performance_schema.events_statements_summary_by_digest - ORDER BY SUM_TIMER_WAIT DESC - LIMIT 20; - --- Current running queries. -SELECT THREAD_ID, SQL_TEXT, TIMER_WAIT/1e12 AS elapsed_sec - FROM performance_schema.events_statements_current - WHERE SQL_TEXT IS NOT NULL; - --- Table I/O: which tables cause the most disk reads. -SELECT OBJECT_SCHEMA, OBJECT_NAME, - COUNT_READ, COUNT_WRITE, - SUM_TIMER_READ/1e12 AS read_sec, - SUM_TIMER_WRITE/1e12 AS write_sec - FROM performance_schema.table_io_waits_summary_by_table - ORDER BY SUM_TIMER_READ DESC - LIMIT 20; - --- Index usage: identify unused indexes. -SELECT OBJECT_SCHEMA, OBJECT_NAME, INDEX_NAME, COUNT_STAR - FROM performance_schema.table_io_waits_summary_by_index_usage - WHERE INDEX_NAME IS NOT NULL AND COUNT_STAR = 0 - AND OBJECT_SCHEMA NOT IN ('mysql', 'performance_schema', 'sys'); -``` - -### sys Schema Shortcuts - -```sql --- The sys schema provides human-readable views over Performance Schema. -SELECT * FROM sys.statements_with_full_table_scans LIMIT 10; -SELECT * FROM sys.schema_unused_indexes; -SELECT * FROM sys.schema_index_statistics ORDER BY rows_selected DESC LIMIT 20; -SELECT * FROM sys.innodb_buffer_stats_by_table ORDER BY allocated DESC LIMIT 20; -SELECT * FROM sys.host_summary; -``` - ---- - -## Common Anti-Patterns - -| Anti-Pattern | Problem | Fix | -|---|---|---| -| `SELECT *` | Reads unnecessary columns, prevents covering indexes | List only needed columns | -| `WHERE YEAR(created_at) = 2026` | Function on column prevents index use | `WHERE created_at >= '2026-01-01' AND created_at < '2027-01-01'` | -| Implicit type conversion | `WHERE phone = 5551234` on VARCHAR column: full scan | Match types: `WHERE phone = '5551234'` | -| `LIKE '%search%'` | Leading wildcard: full scan | Use fulltext index or external search engine | -| `ORDER BY RAND()` | Scans entire table, sorts in memory | Pre-select random IDs, then fetch | -| Missing LIMIT on unbounded queries | OOM on large tables | Always LIMIT unless you need all rows | -| Too many indexes | Slows writes, wastes memory | Audit with `sys.schema_unused_indexes` | - ---- - -## Official References - -- EXPLAIN Output Format: -- Optimization: -- InnoDB Buffer Pool: -- Performance Schema: -- sys Schema: diff --git a/plugins/flow/skills/mysql/references/replication.md b/plugins/flow/skills/mysql/references/replication.md deleted file mode 100644 index 5c6a2ce..0000000 --- a/plugins/flow/skills/mysql/references/replication.md +++ /dev/null @@ -1,372 +0,0 @@ -# Replication & High Availability - -## Overview - -This reference covers MySQL replication topologies from basic source-replica setups through InnoDB Cluster and ClusterSet, including monitoring, failover, and read/write splitting. - ---- - -## Binary Log Replication - -### Source-Replica Setup (GTID Mode) - -GTID (Global Transaction Identifiers) uniquely identify every transaction across all servers, making failover and re-pointing replicas straightforward. - -**On the source (my.cnf):** - -```ini -[mysqld] -server-id = 1 -log-bin = mysql-bin -binlog_format = ROW -gtid_mode = ON -enforce_gtid_consistency = ON -``` - -**Create a replication user:** - -```sql -CREATE USER 'repl_user'@'10.0.%' IDENTIFIED BY 'ReplicaP@ss!'; -GRANT REPLICATION SLAVE ON *.* TO 'repl_user'@'10.0.%'; -``` - -**On the replica (my.cnf):** - -```ini -[mysqld] -server-id = 2 -relay-log = relay-bin -gtid_mode = ON -enforce_gtid_consistency = ON -read_only = ON -super_read_only = ON -``` - -**Start replication:** - -```sql --- On the replica. -CHANGE REPLICATION SOURCE TO - SOURCE_HOST = '10.0.1.10', - SOURCE_PORT = 3306, - SOURCE_USER = 'repl_user', - SOURCE_PASSWORD = 'ReplicaP@ss!', - SOURCE_AUTO_POSITION = 1; -- GTID-based positioning - -START REPLICA; -SHOW REPLICA STATUS\G -``` - -### Position-Based Replication (Legacy) - -```sql --- Without GTID, specify binary log file and position. -CHANGE REPLICATION SOURCE TO - SOURCE_HOST = '10.0.1.10', - SOURCE_USER = 'repl_user', - SOURCE_PASSWORD = 'ReplicaP@ss!', - SOURCE_LOG_FILE = 'mysql-bin.000042', - SOURCE_LOG_POS = 12345; - -START REPLICA; -``` - -### Semi-Synchronous Replication - -Semi-sync ensures at least one replica acknowledges each transaction before the source returns to the client. Provides stronger durability than async replication. - -```sql --- On the source. -INSTALL PLUGIN rpl_semi_sync_source SONAME 'semisync_source.so'; -SET GLOBAL rpl_semi_sync_source_enabled = ON; -SET GLOBAL rpl_semi_sync_source_wait_for_replica_count = 1; -SET GLOBAL rpl_semi_sync_source_timeout = 5000; -- ms, fallback to async if exceeded - --- On the replica. -INSTALL PLUGIN rpl_semi_sync_replica SONAME 'semisync_replica.so'; -SET GLOBAL rpl_semi_sync_replica_enabled = ON; -STOP REPLICA; START REPLICA; - --- Verify semi-sync is active. -SHOW STATUS LIKE 'Rpl_semi_sync%'; -``` - -### Multi-Source Replication - -```sql --- A single replica can replicate from multiple sources (channels). -CHANGE REPLICATION SOURCE TO - SOURCE_HOST = '10.0.1.10', - SOURCE_USER = 'repl_user', - SOURCE_PASSWORD = 'secret', - SOURCE_AUTO_POSITION = 1 - FOR CHANNEL 'source_a'; - -CHANGE REPLICATION SOURCE TO - SOURCE_HOST = '10.0.2.10', - SOURCE_USER = 'repl_user', - SOURCE_PASSWORD = 'secret', - SOURCE_AUTO_POSITION = 1 - FOR CHANNEL 'source_b'; - -START REPLICA FOR CHANNEL 'source_a'; -START REPLICA FOR CHANNEL 'source_b'; - --- Check status per channel. -SHOW REPLICA STATUS FOR CHANNEL 'source_a'\G -``` - ---- - -## Group Replication - -Group Replication provides built-in distributed consensus (Paxos-based) for automatic failover and conflict detection. - -### Single-Primary Mode (Recommended) - -One server accepts writes; all others are read-only. Automatic primary election on failure. - -```ini -# my.cnf on each member. -[mysqld] -server-id = 1 # unique per member -gtid_mode = ON -enforce_gtid_consistency = ON -binlog_checksum = NONE -log_slave_updates = ON -binlog_format = ROW - -plugin_load_add = 'group_replication.so' -group_replication_group_name = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' -group_replication_start_on_boot = OFF -group_replication_local_address = '10.0.1.10:33061' -group_replication_group_seeds = '10.0.1.10:33061,10.0.1.11:33061,10.0.1.12:33061' -group_replication_single_primary_mode = ON -``` - -```sql --- Bootstrap the first member. -SET GLOBAL group_replication_bootstrap_group = ON; -START GROUP_REPLICATION; -SET GLOBAL group_replication_bootstrap_group = OFF; - --- Join additional members. -START GROUP_REPLICATION; - --- Check group membership. -SELECT MEMBER_ID, MEMBER_HOST, MEMBER_STATE, MEMBER_ROLE - FROM performance_schema.replication_group_members; -``` - -### Multi-Primary Mode - -All members accept writes. Conflict detection rolls back conflicting transactions. - -```sql --- Switch to multi-primary. -SELECT group_replication_switch_to_multi_primary_mode(); - --- Switch back to single-primary. -SELECT group_replication_switch_to_single_primary_mode('member-uuid'); -``` - -**Multi-primary limitations:** - -- No foreign keys with CASCADE (detected and blocked). -- DDL and DML on the same object can cause conflicts. -- Serialization of DDL across the group increases latency. - ---- - -## InnoDB Cluster - -InnoDB Cluster combines Group Replication, MySQL Shell, and MySQL Router for an integrated HA solution. - -### Provisioning with MySQL Shell - -```javascript -// Connect to MySQL Shell. -// mysqlsh root@primary:3306 - -// Create the cluster (configures Group Replication automatically). -var cluster = dba.createCluster('myCluster'); - -// Add instances (MySQL Shell clones data automatically if needed). -cluster.addInstance('root@replica1:3306'); -cluster.addInstance('root@replica2:3306'); - -// Check cluster status. -cluster.status(); - -// Bootstrap MySQL Router for automatic connection routing. -// mysqlrouter --bootstrap root@primary:3306 --directory /etc/mysqlrouter -``` - -### Cluster Operations - -```javascript -// Remove an instance. -cluster.removeInstance('root@replica2:3306'); - -// Rejoin an instance after failure. -cluster.rejoinInstance('root@replica2:3306'); - -// Force quorum when majority is lost (dangerous, manual intervention). -cluster.forceQuorumUsingPartitionOf('root@primary:3306'); - -// Switchover (planned primary change). -cluster.setPrimaryInstance('root@replica1:3306'); - -// Dissolve the cluster. -cluster.dissolve(); -``` - ---- - -## InnoDB ClusterSet - -ClusterSet provides disaster recovery across data centers by linking an InnoDB Cluster (primary) with replica clusters in other regions. - -```javascript -// Create a ClusterSet from an existing InnoDB Cluster. -var cs = cluster.createClusterSet('myClusterSet'); - -// Create a replica cluster in another data center. -cs.createReplicaCluster('root@dc2-primary:3306', 'dc2Cluster'); - -// Check ClusterSet status. -cs.status(); - -// Emergency failover (when primary DC is down). -cs.forcePrimaryCluster('dc2Cluster'); - -// Controlled switchover (planned). -cs.setPrimaryCluster('dc2Cluster'); -``` - ---- - -## InnoDB ReplicaSet - -ReplicaSet manages async replication (no Group Replication) via MySQL Shell. Simpler than InnoDB Cluster, suitable when automatic failover is not required. - -```javascript -// Create a ReplicaSet. -var rs = dba.createReplicaSet('myReplicaSet'); - -// Add replicas. -rs.addInstance('root@replica1:3306'); - -// Check status. -rs.status(); - -// Manual failover. -rs.setPrimaryInstance('root@replica1:3306'); - -// Force failover when primary is unreachable. -rs.forcePrimaryInstance('root@replica1:3306'); -``` - ---- - -## Monitoring Replication - -### Key Metrics - -```sql --- Basic replication status. -SHOW REPLICA STATUS\G -``` - -| Field | What to check | -|---|---| -| `Replica_IO_Running` | Must be `Yes` | -| `Replica_SQL_Running` | Must be `Yes` | -| `Seconds_Behind_Source` | Replication lag in seconds (0 = caught up) | -| `Last_IO_Error` / `Last_SQL_Error` | Error details when replication breaks | -| `Retrieved_Gtid_Set` | GTIDs received from source | -| `Executed_Gtid_Set` | GTIDs applied locally | - -### Monitoring Replication Lag - -```sql --- Performance Schema (more accurate than Seconds_Behind_Source). -SELECT CHANNEL_NAME, - LAST_APPLIED_TRANSACTION_END_APPLY_TIMESTAMP, - APPLYING_TRANSACTION_ORIGINAL_COMMIT_TIMESTAMP, - TIMESTAMPDIFF(SECOND, - APPLYING_TRANSACTION_ORIGINAL_COMMIT_TIMESTAMP, - NOW()) AS lag_seconds - FROM performance_schema.replication_applier_status_by_worker - ORDER BY lag_seconds DESC - LIMIT 5; - --- Heartbeat-based lag monitoring. --- On source: INSERT INTO heartbeat (id, ts) VALUES (1, NOW()) ON DUPLICATE KEY UPDATE ts = NOW(); --- On replica: SELECT TIMESTAMPDIFF(SECOND, ts, NOW()) AS lag_seconds FROM heartbeat WHERE id = 1; -``` - -### Common Replication Issues - -| Issue | Symptom | Fix | -|---|---|---| -| SQL thread stopped | `Last_SQL_Error` shows conflict | Skip GTID or fix data divergence | -| IO thread stopped | Network/auth error | Check connectivity, credentials, firewall | -| Large lag | `Seconds_Behind_Source` growing | Enable parallel replication, check slow queries on replica | -| GTID gaps | `Executed_Gtid_Set` has holes | Use `gtid_purged` to skip or clone from scratch | - -### Parallel Replication - -```sql --- Enable parallel replication to reduce lag on multi-threaded workloads. --- replica_parallel_workers: number of applier threads (default 4 in 8.0.27+). -SET GLOBAL replica_parallel_workers = 8; -SET GLOBAL replica_parallel_type = 'LOGICAL_CLOCK'; -SET GLOBAL replica_preserve_commit_order = ON; - -STOP REPLICA; START REPLICA; -``` - ---- - -## ProxySQL: Query Routing - -```sql --- ProxySQL configuration for read/write splitting. --- Connect to ProxySQL admin (port 6032). - --- Define hostgroups: 10 = writer (primary), 20 = reader (replicas). -INSERT INTO mysql_servers (hostgroup_id, hostname, port, max_connections) -VALUES (10, 'mysql-primary', 3306, 200), - (20, 'mysql-replica1', 3306, 200), - (20, 'mysql-replica2', 3306, 200); - --- Replication hostgroups for automatic failover detection. -INSERT INTO mysql_replication_hostgroups (writer_hostgroup, reader_hostgroup) -VALUES (10, 20); - --- Query rules: route reads to replicas, writes to primary. -INSERT INTO mysql_query_rules (rule_id, active, match_pattern, destination_hostgroup, apply) -VALUES (1, 1, '^SELECT.*FOR UPDATE', 10, 1), -- SELECT FOR UPDATE -> primary - (2, 1, '^SELECT', 20, 1), -- other SELECTs -> replicas - (3, 1, '.*', 10, 1); -- everything else -> primary - --- Add application user. -INSERT INTO mysql_users (username, password, default_hostgroup) -VALUES ('app_user', 'secret', 10); - --- Apply configuration. -LOAD MYSQL SERVERS TO RUNTIME; SAVE MYSQL SERVERS TO DISK; -LOAD MYSQL QUERY RULES TO RUNTIME; SAVE MYSQL QUERY RULES TO DISK; -LOAD MYSQL USERS TO RUNTIME; SAVE MYSQL USERS TO DISK; -``` - ---- - -## Official References - -- Replication: -- Group Replication: -- InnoDB Cluster: -- InnoDB ClusterSet: -- ProxySQL: diff --git a/plugins/flow/skills/mysql/references/security.md b/plugins/flow/skills/mysql/references/security.md deleted file mode 100644 index 81a6624..0000000 --- a/plugins/flow/skills/mysql/references/security.md +++ /dev/null @@ -1,304 +0,0 @@ -# Security - -## Overview - -This reference covers MySQL security essentials: user and role management, authentication plugins, encrypted connections, data-at-rest encryption, auditing, and SQL injection prevention. - ---- - -## User Management - -### CREATE USER - -```sql --- Create a user with caching_sha2_password (default in 8.0+). -CREATE USER 'app_user'@'10.0.%' IDENTIFIED BY 'StrongP@ss123!'; - --- Limit resources. -CREATE USER 'api_svc'@'%' - IDENTIFIED BY 'secret' - WITH MAX_QUERIES_PER_HOUR 10000 - MAX_CONNECTIONS_PER_HOUR 500 - MAX_USER_CONNECTIONS 20; - --- Account locking and password expiry. -CREATE USER 'temp_user'@'%' - IDENTIFIED BY 'temp123' - PASSWORD EXPIRE INTERVAL 30 DAY - FAILED_LOGIN_ATTEMPTS 5 - PASSWORD_LOCK_TIME 1; -- lock for 1 day after 5 failures (8.0.19+) -``` - -### GRANT / REVOKE - -```sql --- Grant database-level privileges. -GRANT SELECT, INSERT, UPDATE, DELETE ON mydb.* TO 'app_user'@'10.0.%'; - --- Grant table-level privileges. -GRANT SELECT ON mydb.public_reports TO 'readonly'@'%'; - --- Grant with GRANT OPTION (user can grant their privileges to others). -GRANT ALL PRIVILEGES ON mydb.* TO 'admin'@'localhost' WITH GRANT OPTION; - --- Revoke. -REVOKE DELETE ON mydb.* FROM 'app_user'@'10.0.%'; - --- Show grants. -SHOW GRANTS FOR 'app_user'@'10.0.%'; -``` - -### Roles (8.0+) - -```sql --- Create roles. -CREATE ROLE 'app_read', 'app_write', 'app_admin'; - --- Grant privileges to roles. -GRANT SELECT ON mydb.* TO 'app_read'; -GRANT INSERT, UPDATE, DELETE ON mydb.* TO 'app_write'; -GRANT ALL PRIVILEGES ON mydb.* TO 'app_admin'; - --- Assign roles to users. -GRANT 'app_read', 'app_write' TO 'app_user'@'10.0.%'; -GRANT 'app_admin' TO 'dba'@'localhost'; - --- Roles must be activated in the session. -SET DEFAULT ROLE ALL TO 'app_user'@'10.0.%'; -- auto-activate on login - --- Or activate manually per session. -SET ROLE 'app_read', 'app_write'; - --- Check active roles. -SELECT CURRENT_ROLE(); -``` - ---- - -## Authentication Plugins - -| Plugin | Default In | Security | Notes | -|---|---|---|---| -| `caching_sha2_password` | 8.0+ | Strong (SHA-256) | Requires SSL or RSA for first auth | -| `mysql_native_password` | 5.7 | Moderate (SHA-1) | Deprecated in 8.0; still available | -| `auth_socket` | Linux | OS-level | Authenticates via Unix socket peer credentials | -| `authentication_ldap_simple` | Enterprise | LDAP | Delegates auth to LDAP/Active Directory | -| `authentication_kerberos` | 8.0.26+ Enterprise | Kerberos | SSO with Kerberos | - -```sql --- Check a user's auth plugin. -SELECT user, host, plugin FROM mysql.user WHERE user = 'app_user'; - --- Change auth plugin. -ALTER USER 'legacy_app'@'%' IDENTIFIED WITH mysql_native_password BY 'secret'; - --- caching_sha2_password requires either: --- 1. SSL/TLS connection, or --- 2. RSA key exchange (client sends password encrypted with server's public key). --- Get the server's public key: --- mysql --get-server-public-key -u app_user -p -``` - ---- - -## SSL/TLS Encrypted Connections - -### Server Setup - -```ini -# my.cnf -[mysqld] -ssl-ca = /etc/mysql/ssl/ca-cert.pem -ssl-cert = /etc/mysql/ssl/server-cert.pem -ssl-key = /etc/mysql/ssl/server-key.pem - -# Require TLS 1.2+. -tls_version = TLSv1.2,TLSv1.3 - -# Require encrypted connections from all clients. -require_secure_transport = ON -``` - -### Verify SSL Status - -```sql -SHOW VARIABLES LIKE 'have_ssl'; -- YES if SSL is available -SHOW VARIABLES LIKE 'tls_version'; -SHOW STATUS LIKE 'Ssl_cipher'; -- shows cipher for current connection -SHOW STATUS LIKE 'Ssl_version'; -``` - -### Per-User SSL Requirements - -```sql --- Require any SSL connection. -ALTER USER 'app_user'@'%' REQUIRE SSL; - --- Require specific cipher. -ALTER USER 'app_user'@'%' REQUIRE CIPHER 'ECDHE-RSA-AES256-GCM-SHA384'; - --- Require client certificate (mutual TLS). -ALTER USER 'app_user'@'%' REQUIRE X509; - --- Require specific certificate attributes. -ALTER USER 'app_user'@'%' - REQUIRE SUBJECT '/CN=app_user/O=MyCompany' - AND ISSUER '/CN=MyCA/O=MyCompany'; -``` - ---- - -## Data-at-Rest Encryption - -### InnoDB Tablespace Encryption - -```sql --- Enable the keyring plugin (required for encryption). --- In my.cnf: --- early-plugin-load=keyring_file.so --- keyring_file_data=/var/lib/mysql-keyring/keyring - --- Encrypt a tablespace. -ALTER TABLE sensitive_data ENCRYPTION = 'Y'; - --- Create encrypted table. -CREATE TABLE secrets ( - id INT PRIMARY KEY, - data VARBINARY(1000) -) ENCRYPTION = 'Y'; - --- Encrypt the system tablespace and redo/undo logs (8.0.16+). -ALTER INSTANCE ROTATE INNODB MASTER KEY; - --- Check encryption status. -SELECT TABLE_SCHEMA, TABLE_NAME, CREATE_OPTIONS - FROM information_schema.TABLES - WHERE CREATE_OPTIONS LIKE '%ENCRYPTION%'; -``` - -### Keyring Plugins - -| Plugin | Type | Notes | -|---|---|---| -| `keyring_file` | File-based | Development only; keys in plaintext file | -| `keyring_encrypted_file` | File-based | Password-protected keyfile | -| `keyring_okv` | Oracle Key Vault | Enterprise; centralized key management | -| `keyring_aws` | AWS KMS | Enterprise; AWS-managed keys | -| `keyring_hashicorp` | HashiCorp Vault | Enterprise; Vault integration | - ---- - -## Audit - -### MySQL Enterprise Audit - -```sql --- Enterprise Audit plugin (commercial license). -INSTALL PLUGIN audit_log SONAME 'audit_log.so'; - --- Filter by event type. -SET GLOBAL audit_log_policy = 'LOGINS'; -- ALL | LOGINS | QUERIES | NONE - --- JSON format for structured parsing. -SET GLOBAL audit_log_format = 'JSON'; -``` - -### Community Alternatives - -```sql --- MariaDB Audit Plugin (works with MySQL 5.7, community alternative). -INSTALL PLUGIN server_audit SONAME 'server_audit.so'; -SET GLOBAL server_audit_events = 'CONNECT,QUERY_DDL,QUERY_DML'; -SET GLOBAL server_audit_logging = ON; - --- Percona Audit Log Plugin (Percona Server only). --- Enabled at compile time; configured via my.cnf. -``` - -### General Query Log (Development Only) - -```sql --- Logs ALL queries. Massive performance impact; never use in production. -SET GLOBAL general_log = ON; -SET GLOBAL log_output = 'TABLE'; -- or 'FILE' -SELECT * FROM mysql.general_log ORDER BY event_time DESC LIMIT 20; -``` - ---- - -## SQL Injection Prevention - -### Prepared Statements (Always) - -```python -# Python: parameterized query (SAFE). -cursor.execute("SELECT * FROM users WHERE email = %s AND status = %s", (email, "active")) - -# NEVER do this (VULNERABLE): -cursor.execute(f"SELECT * FROM users WHERE email = '{email}'") -``` - -```javascript -// Node.js mysql2: prepared statement (SAFE). -const [rows] = await pool.execute('SELECT * FROM users WHERE email = ? AND status = ?', [email, 'active']); - -// NEVER do this (VULNERABLE): -const [rows] = await pool.query(`SELECT * FROM users WHERE email = '${email}'`); -``` - -```java -// Java: PreparedStatement (SAFE). -PreparedStatement ps = conn.prepareStatement("SELECT * FROM users WHERE email = ? AND status = ?"); -ps.setString(1, email); -ps.setString(2, "active"); -``` - -### Why mysql_real_escape_string Is Not Enough - -- Escaping is error-prone: developers forget it, or use it inconsistently. -- Character set mismatches can bypass escaping (GBK multibyte injection). -- Prepared statements send the query structure and data separately; the server never parses user input as SQL. -- Modern ORMs use prepared statements by default. If writing raw SQL, always parameterize. - -### Defense in Depth - -1. **Prepared statements** for all user input in queries. -2. **Least-privilege accounts** — app users should not have `DROP`, `FILE`, `SUPER`, or `GRANT` privileges. -3. **Input validation** at the application layer (type checks, length limits, allowlists). -4. **WAF rules** as an additional layer (not a replacement for parameterized queries). -5. **Disable `LOCAL INFILE`** unless explicitly needed: `SET GLOBAL local_infile = OFF`. - ---- - -## Password Policies - -### validate_password Component - -```sql --- Install the validate_password component (8.0+). -INSTALL COMPONENT 'file://component_validate_password'; - --- Configure policy. -SET GLOBAL validate_password.policy = STRONG; -- LOW | MEDIUM | STRONG -SET GLOBAL validate_password.length = 12; -SET GLOBAL validate_password.mixed_case_count = 1; -SET GLOBAL validate_password.number_count = 1; -SET GLOBAL validate_password.special_char_count = 1; - --- Check current policy. -SHOW VARIABLES LIKE 'validate_password%'; - --- Test password strength. -SELECT VALIDATE_PASSWORD_STRENGTH('MyP@ss123!') AS strength; --- Returns 0-100 score. -``` - ---- - -## Official References - -- MySQL Security: -- Authentication Plugins: -- Encrypted Connections: -- InnoDB Encryption: -- validate_password: diff --git a/plugins/flow/skills/mysql/references/sql_patterns.md b/plugins/flow/skills/mysql/references/sql_patterns.md deleted file mode 100644 index dd0e9a0..0000000 --- a/plugins/flow/skills/mysql/references/sql_patterns.md +++ /dev/null @@ -1,267 +0,0 @@ -# MySQL SQL Patterns - -## Overview - -Use this reference when writing non-trivial MySQL SQL: window functions, CTEs, JSON shredding, upserts, regex, or generated columns. Every pattern targets MySQL 8.0+ unless noted otherwise. - ---- - -## Window Functions (8.0+) - -Window functions compute values across a set of rows related to the current row without collapsing the result set. - -### ROW_NUMBER, RANK, DENSE_RANK - -```sql --- ROW_NUMBER assigns a unique sequential integer per partition. --- Use it to get exactly one row per group (e.g., latest order per customer). -SELECT customer_id, order_date, total, - ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date DESC) AS rn - FROM orders; - --- RANK leaves gaps after ties (1,1,3); DENSE_RANK does not (1,1,2). -SELECT product_id, revenue, - RANK() OVER (ORDER BY revenue DESC) AS revenue_rank, - DENSE_RANK() OVER (ORDER BY revenue DESC) AS revenue_dense - FROM product_sales; -``` - -### LAG / LEAD - -```sql --- LAG looks backward; LEAD looks forward within the window. --- Avoids self-joins for comparing adjacent rows. -SELECT trade_date, close_price, - LAG(close_price) OVER (ORDER BY trade_date) AS prev_close, - close_price - LAG(close_price) OVER (ORDER BY trade_date) AS daily_change - FROM stock_prices - WHERE ticker = 'MSFT'; -``` - -### Running Totals - -```sql --- Windowed SUM with ORDER BY produces a running total. -SELECT txn_date, amount, - SUM(amount) OVER (ORDER BY txn_date - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_balance - FROM ledger - WHERE account_id = 1001; -``` - -### NTILE - -```sql --- NTILE distributes rows into N roughly equal buckets. --- Use for percentile segmentation (quartiles, deciles). -SELECT employee_id, salary, - NTILE(4) OVER (ORDER BY salary) AS salary_quartile - FROM employees; -``` - ---- - -## Common Table Expressions (CTEs) - -### Basic CTE - -```sql --- CTEs improve readability by naming intermediate result sets. -WITH active_users AS ( - SELECT id, name, email - FROM users - WHERE status = 'active' - AND last_login > NOW() - INTERVAL 30 DAY -) -SELECT au.name, COUNT(o.id) AS order_count - FROM active_users au - JOIN orders o ON o.user_id = au.id - GROUP BY au.name; -``` - -### Recursive CTEs - -```sql --- Walk a tree structure (e.g., org chart, category hierarchy). -WITH RECURSIVE org_tree AS ( - -- Anchor: top-level managers - SELECT id, name, manager_id, 1 AS depth - FROM employees - WHERE manager_id IS NULL - - UNION ALL - - -- Recursive step - SELECT e.id, e.name, e.manager_id, ot.depth + 1 - FROM employees e - JOIN org_tree ot ON e.manager_id = ot.id -) -SELECT CONCAT(REPEAT(' ', depth - 1), name) AS org_chart, depth - FROM org_tree - ORDER BY depth, name; -``` - -### Recursive CTE for Date Series - -```sql --- Generate a continuous date series (useful for gap-filling reports). -WITH RECURSIVE dates AS ( - SELECT DATE('2026-01-01') AS dt - UNION ALL - SELECT dt + INTERVAL 1 DAY FROM dates WHERE dt < '2026-03-31' -) -SELECT d.dt, COALESCE(SUM(o.total), 0) AS daily_revenue - FROM dates d - LEFT JOIN orders o ON DATE(o.created_at) = d.dt - GROUP BY d.dt; -``` - ---- - -## JSON_TABLE (8.0+) - -```sql --- Shred a JSON array into relational rows. -SELECT p.id, p.name, jt.tag - FROM products p, - JSON_TABLE(p.tags, '$[*]' COLUMNS ( - tag VARCHAR(100) PATH '$' - )) AS jt; - --- Nested JSON with multiple columns. -SELECT o.id, items.* - FROM orders o, - JSON_TABLE(o.line_items, '$[*]' COLUMNS ( - product_id INT PATH '$.product_id', - quantity INT PATH '$.qty', - unit_price DECIMAL(10,2) PATH '$.price', - NESTED PATH '$.discounts[*]' COLUMNS ( - discount_pct DECIMAL(5,2) PATH '$.percent' - ) - )) AS items; -``` - ---- - -## INSERT ... ON DUPLICATE KEY UPDATE (Upsert) - -```sql --- MySQL's upsert pattern. Requires a UNIQUE or PRIMARY KEY constraint. -INSERT INTO metrics (metric_key, value, updated_at) -VALUES ('page_views', 1, NOW()) -ON DUPLICATE KEY UPDATE - value = value + VALUES(value), - updated_at = NOW(); - --- Bulk upsert with VALUES row alias (8.0.19+). -INSERT INTO inventory (sku, warehouse_id, qty) -VALUES ('ABC', 1, 10), ('DEF', 1, 5) - AS new_vals -ON DUPLICATE KEY UPDATE - qty = inventory.qty + new_vals.qty; -``` - -### REPLACE INTO vs INSERT IGNORE - -```sql --- REPLACE deletes the conflicting row then inserts. Resets auto-increment, --- fires DELETE + INSERT triggers. Prefer ON DUPLICATE KEY UPDATE instead. -REPLACE INTO settings (user_id, theme) VALUES (42, 'dark'); - --- INSERT IGNORE silently discards rows that violate unique constraints. --- Also suppresses other errors (type truncation) — use with caution. -INSERT IGNORE INTO tags (name) VALUES ('mysql'), ('postgres'), ('mysql'); -``` - ---- - -## GROUP_CONCAT / JSON_ARRAYAGG - -```sql --- GROUP_CONCAT concatenates values into a comma-separated string. --- Default max length is 1024; increase with group_concat_max_len. -SELECT department_id, - GROUP_CONCAT(name ORDER BY name SEPARATOR ', ') AS team_members - FROM employees - GROUP BY department_id; - --- JSON_ARRAYAGG returns a proper JSON array (8.0+). -SELECT department_id, - JSON_ARRAYAGG(name) AS team_members_json - FROM employees - GROUP BY department_id; - --- JSON_OBJECTAGG for key-value aggregation. -SELECT JSON_OBJECTAGG(setting_key, setting_value) AS config - FROM app_settings - WHERE app_id = 1; -``` - ---- - -## Lateral Derived Tables (8.0.14+) - -```sql --- LATERAL allows the derived table to reference columns from preceding tables. --- Use for top-N-per-group without window function workarounds. -SELECT c.name, top_orders.* - FROM customers c, - LATERAL ( - SELECT o.id, o.total, o.created_at - FROM orders o - WHERE o.customer_id = c.id - ORDER BY o.total DESC - LIMIT 3 - ) AS top_orders; -``` - ---- - -## REGEXP_REPLACE / REGEXP_SUBSTR (8.0+) - -```sql --- REGEXP_REPLACE: replace pattern matches within a string. -SELECT REGEXP_REPLACE('foo bar baz', '\\s+', ' ') AS cleaned; --- Result: 'foo bar baz' - --- REGEXP_SUBSTR: extract the first match. -SELECT REGEXP_SUBSTR('Order #12345 placed', '#[0-9]+') AS order_ref; --- Result: '#12345' - --- REGEXP_LIKE: boolean pattern match (replaces RLIKE in conditions). -SELECT * FROM products WHERE REGEXP_LIKE(sku, '^[A-Z]{2}-[0-9]{4}$'); -``` - ---- - -## Generated Columns (Stored and Virtual) - -```sql --- Virtual column: computed on read, not stored on disk. --- Useful for indexing computed expressions without storing redundant data. -ALTER TABLE users - ADD full_name VARCHAR(200) AS (CONCAT(first_name, ' ', last_name)) VIRTUAL; - --- Stored column: computed on write, persisted to disk. --- Required when the expression is non-deterministic or you need it in a --- FOREIGN KEY constraint. -ALTER TABLE orders - ADD total_with_tax DECIMAL(12,2) AS (subtotal * (1 + tax_rate)) STORED; - --- Index a generated column for fast lookups. -CREATE INDEX idx_users_full_name ON users (full_name); - --- Generated column on JSON for indexing JSON values (common pattern). -ALTER TABLE products - ADD category VARCHAR(100) AS (JSON_UNQUOTE(JSON_EXTRACT(attrs, '$.category'))) VIRTUAL; -CREATE INDEX idx_products_category ON products (category); -``` - ---- - -## Official References - -- MySQL 8.0 SQL Statement Reference: -- Window Functions: -- JSON Functions: -- Generated Columns: diff --git a/plugins/flow/skills/mysql/references/stored_procedures.md b/plugins/flow/skills/mysql/references/stored_procedures.md deleted file mode 100644 index 229678e..0000000 --- a/plugins/flow/skills/mysql/references/stored_procedures.md +++ /dev/null @@ -1,387 +0,0 @@ -# Stored Procedures & Functions - -## Overview - -MySQL stored routines encapsulate reusable SQL logic on the server side. This reference covers procedures, functions, control flow, error handling, cursors, triggers, and scheduled events. - ---- - -## CREATE PROCEDURE / CREATE FUNCTION - -```sql --- Procedure: may return multiple result sets, uses OUT/INOUT parameters. -DELIMITER $$ -CREATE PROCEDURE get_customer_orders( - IN p_customer_id INT, - OUT p_order_count INT -) -BEGIN - SELECT COUNT(*) INTO p_order_count - FROM orders - WHERE customer_id = p_customer_id; - - -- Return a result set as well. - SELECT id, total, created_at - FROM orders - WHERE customer_id = p_customer_id - ORDER BY created_at DESC; -END$$ -DELIMITER ; - --- Call the procedure. -CALL get_customer_orders(42, @cnt); -SELECT @cnt AS order_count; -``` - -```sql --- Function: returns a single scalar value. Must be deterministic or --- declared with appropriate characteristics for replication safety. -DELIMITER $$ -CREATE FUNCTION calculate_tax( - subtotal DECIMAL(12,2), - tax_rate DECIMAL(5,4) -) -RETURNS DECIMAL(12,2) -DETERMINISTIC -NO SQL -BEGIN - RETURN subtotal * tax_rate; -END$$ -DELIMITER ; - --- Use in queries. -SELECT id, subtotal, calculate_tax(subtotal, 0.0825) AS tax FROM orders; -``` - ---- - -## Variables and Control Flow - -### Variables - -```sql -DELIMITER $$ -CREATE PROCEDURE demo_variables() -BEGIN - -- Local variables (must be declared before any other statements). - DECLARE v_count INT DEFAULT 0; - DECLARE v_name VARCHAR(100); - DECLARE v_done BOOLEAN DEFAULT FALSE; - - SET v_count = 10; - SELECT name INTO v_name FROM users WHERE id = 1; -END$$ -DELIMITER ; -``` - -### IF / CASE - -```sql -DELIMITER $$ -CREATE PROCEDURE categorize_order(IN p_total DECIMAL(12,2), OUT p_tier VARCHAR(20)) -BEGIN - IF p_total >= 1000 THEN - SET p_tier = 'premium'; - ELSEIF p_total >= 100 THEN - SET p_tier = 'standard'; - ELSE - SET p_tier = 'basic'; - END IF; -END$$ -DELIMITER ; -``` - -```sql --- CASE expression in a procedure. -DELIMITER $$ -CREATE FUNCTION order_priority(status VARCHAR(20)) -RETURNS INT -DETERMINISTIC -NO SQL -BEGIN - RETURN CASE status - WHEN 'urgent' THEN 1 - WHEN 'high' THEN 2 - WHEN 'normal' THEN 3 - ELSE 4 - END; -END$$ -DELIMITER ; -``` - -### LOOP / WHILE / REPEAT - -```sql -DELIMITER $$ -CREATE PROCEDURE loop_examples() -BEGIN - DECLARE v_i INT DEFAULT 0; - - -- WHILE loop - WHILE v_i < 10 DO - SET v_i = v_i + 1; - END WHILE; - - -- REPEAT loop (executes at least once, like do-while) - SET v_i = 0; - REPEAT - SET v_i = v_i + 1; - UNTIL v_i >= 10 - END REPEAT; - - -- LOOP with labeled LEAVE (break) and ITERATE (continue) - SET v_i = 0; - my_loop: LOOP - SET v_i = v_i + 1; - IF v_i = 5 THEN - ITERATE my_loop; -- skip to next iteration - END IF; - IF v_i >= 10 THEN - LEAVE my_loop; -- exit loop - END IF; - END LOOP my_loop; -END$$ -DELIMITER ; -``` - ---- - -## Cursors - -```sql --- Cursors iterate row-by-row over a result set. --- Always declare the NOT FOUND handler to detect end-of-cursor. -DELIMITER $$ -CREATE PROCEDURE process_inactive_users() -BEGIN - DECLARE v_user_id INT; - DECLARE v_email VARCHAR(255); - DECLARE v_done BOOLEAN DEFAULT FALSE; - - DECLARE cur CURSOR FOR - SELECT id, email FROM users - WHERE last_login < NOW() - INTERVAL 1 YEAR; - - DECLARE CONTINUE HANDLER FOR NOT FOUND SET v_done = TRUE; - - OPEN cur; - - read_loop: LOOP - FETCH cur INTO v_user_id, v_email; - IF v_done THEN - LEAVE read_loop; - END IF; - - -- Process each row. - INSERT INTO inactive_user_log (user_id, email, logged_at) - VALUES (v_user_id, v_email, NOW()); - END LOOP; - - CLOSE cur; -END$$ -DELIMITER ; -``` - ---- - -## Error Handling - -### DECLARE HANDLER - -```sql -DELIMITER $$ -CREATE PROCEDURE safe_insert(IN p_name VARCHAR(100)) -BEGIN - -- Handler for duplicate key violation (SQLSTATE '23000', errno 1062). - DECLARE EXIT HANDLER FOR 1062 - BEGIN - SELECT CONCAT('Duplicate entry for: ', p_name) AS error_message; - END; - - -- Handler for any SQL exception. - DECLARE EXIT HANDLER FOR SQLEXCEPTION - BEGIN - GET DIAGNOSTICS CONDITION 1 - @err_no = MYSQL_ERRNO, - @err_msg = MESSAGE_TEXT; - SELECT @err_no AS errno, @err_msg AS message; - ROLLBACK; - END; - - START TRANSACTION; - INSERT INTO categories (name) VALUES (p_name); - COMMIT; -END$$ -DELIMITER ; -``` - -### SIGNAL / RESIGNAL - -```sql -DELIMITER $$ -CREATE PROCEDURE validate_age(IN p_age INT) -BEGIN - IF p_age < 0 OR p_age > 150 THEN - -- Raise a custom error. SQLSTATE '45000' = user-defined condition. - SIGNAL SQLSTATE '45000' - SET MESSAGE_TEXT = 'Age must be between 0 and 150', - MYSQL_ERRNO = 50001; - END IF; -END$$ -DELIMITER ; -``` - -```sql --- RESIGNAL re-raises the current exception, optionally modifying it. -DELIMITER $$ -CREATE PROCEDURE wrapped_insert() -BEGIN - DECLARE EXIT HANDLER FOR SQLEXCEPTION - BEGIN - RESIGNAL SET MESSAGE_TEXT = 'wrapped_insert failed'; - END; - - INSERT INTO audit_log (event) VALUES ('test'); -END$$ -DELIMITER ; -``` - -### Common SQLSTATE Codes - -| SQLSTATE | Meaning | MySQL Errno | -|----------|-----------------------------|-------------| -| `02000` | No data / NOT FOUND | 1329 | -| `23000` | Integrity constraint violation | 1062, 1452 | -| `40001` | Deadlock / serialization failure | 1213 | -| `42000` | Syntax error / access denied | 1064, 1044 | -| `45000` | User-defined condition | (custom) | -| `HY000` | General error | (varies) | - ---- - -## Prepared Statements in Stored Procedures - -```sql --- Use PREPARE/EXECUTE for dynamic SQL inside procedures. --- Always deallocate to avoid memory leaks. -DELIMITER $$ -CREATE PROCEDURE dynamic_count(IN p_table VARCHAR(64), OUT p_count BIGINT) -BEGIN - -- Validate identifier to prevent SQL injection. - IF p_table NOT REGEXP '^[a-zA-Z_][a-zA-Z0-9_]*$' THEN - SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = 'Invalid table name'; - END IF; - - SET @sql = CONCAT('SELECT COUNT(*) INTO @cnt FROM `', p_table, '`'); - PREPARE stmt FROM @sql; - EXECUTE stmt; - DEALLOCATE PREPARE stmt; - SET p_count = @cnt; -END$$ -DELIMITER ; -``` - ---- - -## Triggers - -```sql --- BEFORE INSERT: validate or transform data before it hits the table. -DELIMITER $$ -CREATE TRIGGER trg_orders_before_insert -BEFORE INSERT ON orders -FOR EACH ROW -BEGIN - IF NEW.total < 0 THEN - SIGNAL SQLSTATE '45000' - SET MESSAGE_TEXT = 'Order total cannot be negative'; - END IF; - SET NEW.created_at = COALESCE(NEW.created_at, NOW()); -END$$ -DELIMITER ; - --- AFTER UPDATE: audit trail. -DELIMITER $$ -CREATE TRIGGER trg_orders_after_update -AFTER UPDATE ON orders -FOR EACH ROW -BEGIN - IF OLD.status <> NEW.status THEN - INSERT INTO order_audit (order_id, old_status, new_status, changed_at) - VALUES (NEW.id, OLD.status, NEW.status, NOW()); - END IF; -END$$ -DELIMITER ; - --- AFTER DELETE: cascade cleanup. -DELIMITER $$ -CREATE TRIGGER trg_users_after_delete -AFTER DELETE ON users -FOR EACH ROW -BEGIN - DELETE FROM user_preferences WHERE user_id = OLD.id; -END$$ -DELIMITER ; -``` - -**Trigger limitations:** MySQL triggers cannot call stored procedures that return result sets, cannot use PREPARE/EXECUTE, and only one trigger per timing/event combination per table (before 5.7.2, extended in 8.0). - ---- - -## Events (Scheduled Tasks) - -```sql --- Enable the event scheduler (must be ON at the server level). -SET GLOBAL event_scheduler = ON; - --- Create a recurring event to purge old logs every day at midnight. -CREATE EVENT evt_purge_old_logs -ON SCHEDULE EVERY 1 DAY -STARTS '2026-03-27 00:00:00' -DO - DELETE FROM application_logs WHERE created_at < NOW() - INTERVAL 90 DAY; - --- One-time event. -CREATE EVENT evt_one_time_cleanup -ON SCHEDULE AT '2026-04-01 03:00:00' -DO - CALL archive_old_orders(); - --- Alter or disable an event. -ALTER EVENT evt_purge_old_logs DISABLE; - --- Drop an event. -DROP EVENT IF EXISTS evt_one_time_cleanup; -``` - ---- - -## Best Practices and Anti-Patterns - -### Best Practices - -- Keep procedures focused: one procedure, one responsibility. -- Use transactions explicitly (`START TRANSACTION` / `COMMIT` / `ROLLBACK`). -- Always declare error handlers to prevent silent failures. -- Use `DETERMINISTIC` / `NO SQL` / `READS SQL DATA` characteristics accurately for replication and optimizer. -- Validate dynamic identifiers with regex before string concatenation. -- Prefer set-based operations over cursors; cursors process row-by-row and are significantly slower. - -### Anti-Patterns - -- **Cursors for batch processing.** Use `INSERT ... SELECT`, `UPDATE ... JOIN`, or `DELETE ... JOIN` instead. -- **Nested cursors.** Refactor into JOINs or temporary tables. -- **Missing DEALLOCATE PREPARE.** Leaks memory from the prepared statement cache. -- **Business logic in triggers.** Triggers are invisible to application developers and make debugging harder. Prefer application-layer logic or stored procedures called explicitly. -- **SELECT * in stored procedures.** Fragile if table schema changes; always list columns explicitly. -- **Ignoring sql_mode.** Procedures written under lax sql_mode may break under `STRICT_TRANS_TABLES`. Always develop with strict mode enabled. - ---- - -## Official References - -- CREATE PROCEDURE: -- CREATE TRIGGER: -- CREATE EVENT: -- Error Handling: -- SIGNAL/RESIGNAL: diff --git a/plugins/flow/skills/nuxt/SKILL.md b/plugins/flow/skills/nuxt/SKILL.md deleted file mode 100644 index d223629..0000000 --- a/plugins/flow/skills/nuxt/SKILL.md +++ /dev/null @@ -1,285 +0,0 @@ ---- -name: nuxt -description: "Use when editing Nuxt apps, nuxt.config.ts, nuxt.config.js, .nuxt directories, useFetch, useAsyncData, Nitro server routes, SSR, SSG, or Vue server rendering with Nuxt." ---- - -# Nuxt 3 Framework Skill - - - -## Quick Reference - -### Page Component - - - -```vue - - - - -``` - - - -### Server API Routes - - - -```typescript -// server/api/users/[id].get.ts -export default defineEventHandler(async (event) => { - const id = getRouterParam(event, 'id'); - - const user = await db.users.findUnique({ where: { id } }); - - if (!user) { - throw createError({ - statusCode: 404, - message: 'User not found', - }); - } - - return user; -}); - -// server/api/users.post.ts -export default defineEventHandler(async (event) => { - const body = await readBody(event); - - const user = await db.users.create({ data: body }); - - return user; -}); -``` - - - -### Composables - - - -```typescript -// composables/useAuth.ts -export function useAuth() { - const user = useState('auth-user', () => null); - const isAuthenticated = computed(() => !!user.value); - - async function login(credentials: Credentials) { - const { data } = await useFetch('/api/auth/login', { - method: 'POST', - body: credentials, - }); - user.value = data.value; - } - - async function logout() { - await useFetch('/api/auth/logout', { method: 'POST' }); - user.value = null; - navigateTo('/login'); - } - - return { user, isAuthenticated, login, logout }; -} -``` - - - -### Data Fetching - - - -```vue - -``` - - - -### Middleware - - - -```typescript -// middleware/auth.ts -export default defineNuxtRouteMiddleware((to, from) => { - const { isAuthenticated } = useAuth(); - - if (!isAuthenticated.value && to.path !== '/login') { - return navigateTo('/login'); - } -}); - -// middleware/admin.ts (named middleware) -export default defineNuxtRouteMiddleware(() => { - const { user } = useAuth(); - - if (user.value?.role !== 'admin') { - throw createError({ - statusCode: 403, - message: 'Forbidden', - }); - } -}); -``` - - - -### Plugins - - - -```typescript -// plugins/api.ts -export default defineNuxtPlugin(() => { - const api = $fetch.create({ - baseURL: '/api', - onRequest({ options }) { - const token = useCookie('token'); - if (token.value) { - options.headers = { - ...options.headers, - Authorization: `Bearer ${token.value}`, - }; - } - }, - }); - - return { - provide: { api }, - }; -}); - -// Usage: const { $api } = useNuxtApp(); -``` - - - -### Hybrid Rendering - - - -```typescript -// nuxt.config.ts -export default defineNuxtConfig({ - routeRules: { - '/': { prerender: true }, - '/blog/**': { isr: 3600 }, // ISR: revalidate every hour - '/admin/**': { ssr: false }, // SPA mode - '/api/**': { cors: true }, - }, -}); -``` - - - -### State Management - - - -```typescript -// With useState (SSR-safe) -const count = useState('counter', () => 0); - -// With Pinia -// stores/user.ts -export const useUserStore = defineStore('user', () => { - const user = ref(null); - - async function fetch() { - user.value = await $fetch('/api/user'); - } - - return { user, fetch }; -}); -``` - - - -## Best Practices - -- Use `useFetch` for data fetching (handles SSR) -- Use `useState` for SSR-safe reactive state -- Use route rules for hybrid rendering strategies -- Use server routes for backend logic -- Use middleware for route guards -- Use `definePageMeta` for page-level config - - - -## References Index - -- **[Litestar-Vite Integration](references/litestar_vite.md)** — Backend integration with Litestar-Vite plugin. - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [TypeScript](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/typescript.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -- **Use `useFetch` or `useAsyncData` for data fetching** -- These composables are SSR-aware and prevent duplicate requests on the client. Never use plain `$fetch` in a component's top-level setup. -- **Never access browser-only globals during SSR** -- Always check `import.meta.client` or use `onMounted` before accessing `window`, `document`, or `localStorage`. -- **Use `server/` directory for sensitive operations** -- Keep database queries, API keys, and complex logic in Nitro server routes to ensure they never leak to the client. -- **Always provide a unique key to `useAsyncData`** -- This is critical for proper hydration and preventing data mismatch between server and client. -- **Prefer `useState` over local refs for global state** -- `useState` is SSR-safe and preserves state during hydration. - - - -## Validation Checkpoint - -- [ ] `useFetch` or `useAsyncData` is used for all top-level data fetching -- [ ] No browser-only globals are accessed in the setup script without checks -- [ ] Sensitive logic and API calls are moved to the `server/api/` directory -- [ ] `useAsyncData` calls have unique and stable keys -- [ ] `definePageMeta` is used for route-level guards and layouts -- [ ] Components that require browser APIs are wrapped in `` or used within `onMounted` - diff --git a/plugins/flow/skills/nuxt/agents/openai.yaml b/plugins/flow/skills/nuxt/agents/openai.yaml deleted file mode 100644 index 2353e21..0000000 --- a/plugins/flow/skills/nuxt/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Nuxt" - short_description: "Nuxt 3 SSR, SSG, Nitro routes, config, data fetching, and Vue integration" diff --git a/plugins/flow/skills/nuxt/references/litestar_vite.md b/plugins/flow/skills/nuxt/references/litestar_vite.md deleted file mode 100644 index 10753cb..0000000 --- a/plugins/flow/skills/nuxt/references/litestar_vite.md +++ /dev/null @@ -1,60 +0,0 @@ -# Litestar-Vite Integration (Framework Mode) - -## Setup with VitePlugin - -```python -# Python backend for Nuxt -from litestar import Litestar -from litestar_vite import ViteConfig, VitePlugin, RuntimeConfig - -vite_config = ViteConfig( - mode="framework", # Framework SSR mode - proxy_mode="proxy", # Proxy everything except Litestar routes - runtime=RuntimeConfig( - port=5173, - framework_port=3000, # Nuxt dev server port - ), -) - -app = Litestar(plugins=[VitePlugin(config=vite_config)]) -``` - -## Using Generated Types - -```typescript -// composables/useApi.ts -import { route } from '~/generated/routes'; -import type { components } from '~/generated/schemas'; - -type User = components['schemas']['User']; - -export function useUser(id: Ref) { - // Type-safe route building - return useFetch(() => route('users:get', { id: id.value })); -} -``` - -## Nuxt + Litestar API Routes - -```typescript -// nuxt.config.ts -export default defineNuxtConfig({ - nitro: { - devProxy: { - '/api': { - target: 'http://localhost:8000', - changeOrigin: true, - }, - }, - }, -}); -``` - -## CLI Commands - -```bash -litestar assets install # Install deps -litestar assets serve # Start Nuxt dev server -litestar assets build # Production build -litestar run # Start Litestar backend -``` diff --git a/plugins/flow/skills/oracle/SKILL.md b/plugins/flow/skills/oracle/SKILL.md deleted file mode 100644 index b4eaa9c..0000000 --- a/plugins/flow/skills/oracle/SKILL.md +++ /dev/null @@ -1,311 +0,0 @@ ---- -name: oracle -description: "Use when working with Oracle Database, Oracle SQL, PL/SQL, sqlplus, cx_Oracle, oracledb, ORDS, OCI drivers, Oracle containers, schema migrations, security, vectors, or performance tuning." ---- - -# Oracle Database - -Use this skill when working with Oracle Database in any capacity: OCI-based data paths (connect, execute, fetch, bind, transaction control), Instant Client configuration, or container-based Oracle 26ai workflows for dev/test/CI environments. - -## Quick Reference - -### Python Connection (oracledb thin mode) - -```python -import oracledb - -# Thin mode -- no Instant Client required -conn = oracledb.connect( - user="app_user", - password="secret", - dsn="host.example.com:1521/FREEPDB1", -) - -with conn.cursor() as cur: - # Always use bind variables - cur.execute( - "SELECT order_id, total FROM orders WHERE customer_id = :cid", - {"cid": 42}, - ) - rows = cur.fetchall() -``` - -### Connection Pooling - -```python -# Create pool at startup; reuse for process lifetime -pool = oracledb.create_pool( - user="app_user", password="secret", - dsn="host.example.com:1521/FREEPDB1", - min=2, max=10, increment=1, -) - -with pool.acquire() as conn: - with conn.cursor() as cur: - cur.execute("SELECT SYSDATE FROM dual") -``` - -### Java JDBC Connection - -```java -import oracle.jdbc.pool.OracleDataSource; - -OracleDataSource ods = new OracleDataSource(); -ods.setURL("jdbc:oracle:thin:@//host.example.com:1521/FREEPDB1"); -ods.setUser("app_user"); -ods.setPassword("secret"); - -try (Connection conn = ods.getConnection(); - PreparedStatement ps = conn.prepareStatement( - "SELECT * FROM orders WHERE customer_id = ?")) { - ps.setInt(1, 42); - try (ResultSet rs = ps.executeQuery()) { - while (rs.next()) { - System.out.println(rs.getInt("order_id")); - } - } -} -``` - -### Key PL/SQL Patterns - -```sql --- Package spec: public API contract -CREATE OR REPLACE PACKAGE order_api AS - SUBTYPE order_id_t IS orders.order_id%TYPE; - - PROCEDURE place_order( - p_customer_id IN customers.customer_id%TYPE, - p_items IN order_item_tab_t, - p_order_id OUT order_id_t - ); -END order_api; -/ - --- Exception handling with diagnostic capture -EXCEPTION - WHEN OTHERS THEN - log_pkg.error( - p_message => SQLERRM, - p_backtrace => DBMS_UTILITY.FORMAT_ERROR_BACKTRACE, - p_stack => DBMS_UTILITY.FORMAT_ERROR_STACK - ); - RAISE; -- re-raise after logging; never silently swallow -``` - -### ORDS REST API Basics - -```text -Module: /api/v1/ (base path) -Template: /api/v1/orders/ (collection) -Template: /api/v1/orders/:id (single item) -Handler: GET on /api/v1/orders/ -> SELECT query -Handler: POST on /api/v1/orders/ -> INSERT + RETURNING -``` - -```sql --- AutoREST: enable CRUD endpoints for a schema -BEGIN - ORDS.ENABLE_SCHEMA( - p_enabled => TRUE, - p_schema => 'APP_USER', - p_url_mapping_type => 'BASE_PATH', - p_url_mapping_pattern => 'app' - ); -END; -/ -``` - - - -## Workflow - -### Step 1: Identify the Pattern - -| Need | Reference | Key Concept | -| --- | --- | --- | -| Connect from Python | connections.md | oracledb thin/thick, pooling | -| Connect from Java | connections.md | JDBC thin, UCP | -| Write PL/SQL | plsql.md | Packages, BULK COLLECT, FORALL | -| SQL patterns | sql_patterns.md | Analytics, CTEs, MERGE, MODEL | -| REST APIs | ords.md | Modules, templates, handlers | -| JSON operations | json.md | JSON_VALUE, Duality Views (23ai+) | -| Container dev/test | containers.md | Podman, 26ai Free | -| Performance tuning | performance.md | EXPLAIN PLAN, AWR, indexes | -| Vector/AI search | vectors.md | VECTOR type, IVF/HNSW indexes | -| Schema migrations | schema_migrations.md | Liquibase, EBR, DBMS_REDEFINITION | - -### Step 2: Implement - -1. Choose thin mode by default -- only use thick mode for Advanced Queuing, Kerberos, or LDAP -2. Create connection pools at startup; never create per-request connections -3. Use bind variables for all parameter values -- enables cursor sharing and prevents injection -4. Anchor PL/SQL parameter types with `%TYPE` / `%ROWTYPE` -5. Log exceptions with `FORMAT_ERROR_BACKTRACE` + `FORMAT_ERROR_STACK`, then re-raise - -### Step 3: Validate - -Run through the validation checkpoint below before considering the work complete. - - - - - -## Guardrails - -- **Always use bind variables**: `:param_name` syntax -- never concatenate values into SQL strings -- **Always use connection pooling**: `create_pool()` at startup, `pool.acquire()` per operation -- **Always re-raise exceptions after logging**: never silently swallow `WHEN OTHERS` -- **Always anchor PL/SQL types**: use `%TYPE` / `%ROWTYPE` so DDL changes propagate automatically -- **Use thick mode only when needed**: Advanced Queuing, Kerberos, LDAP, or Sharding -- thin mode is default and dependency-free -- **Use `RAISE_APPLICATION_ERROR`** for custom errors visible to SQL callers (range -20000 to -20999) -- **Never use implicit cursors for multi-row operations**: use BULK COLLECT/FORALL to minimize context switches -- **Never commit inside reusable PL/SQL packages**: let the caller control transaction boundaries - - - - - -### Validation Checkpoint - -Before delivering Oracle code, verify: - -- [ ] All queries use bind variables (`:param_name`) -- no string concatenation for values -- [ ] Connection pooling is configured with `min`/`max`/`increment` parameters -- [ ] Thick mode is only initialized when features require it (Advanced Queuing, Kerberos, etc.) -- [ ] PL/SQL exception handlers log backtrace + stack and re-raise (no silent swallowing) -- [ ] PL/SQL parameter types are anchored to table columns with `%TYPE` -- [ ] ORDS handlers use bind variables (`:id`) in SQL source, not concatenation - - - - - -## Example - -**Task:** "Create a PL/SQL stored procedure for order placement with proper error handling, and call it from Python with connection pooling." - -```sql --- PL/SQL: Package for order management -CREATE OR REPLACE PACKAGE order_api AS - SUBTYPE order_id_t IS orders.order_id%TYPE; - - gc_max_items CONSTANT PLS_INTEGER := 500; - - PROCEDURE place_order( - p_customer_id IN customers.customer_id%TYPE, - p_product_id IN products.product_id%TYPE, - p_quantity IN PLS_INTEGER, - p_order_id OUT order_id_t - ); -END order_api; -/ - -CREATE OR REPLACE PACKAGE BODY order_api AS - PROCEDURE place_order( - p_customer_id IN customers.customer_id%TYPE, - p_product_id IN products.product_id%TYPE, - p_quantity IN PLS_INTEGER, - p_order_id OUT order_id_t - ) IS - v_price products.unit_price%TYPE; - BEGIN - IF p_quantity > gc_max_items THEN - RAISE_APPLICATION_ERROR(-20100, - 'Quantity ' || p_quantity || ' exceeds limit of ' || gc_max_items); - END IF; - - SELECT unit_price INTO v_price - FROM products - WHERE product_id = p_product_id; - - INSERT INTO orders (customer_id, product_id, quantity, total) - VALUES (p_customer_id, p_product_id, p_quantity, v_price * p_quantity) - RETURNING order_id INTO p_order_id; - - EXCEPTION - WHEN NO_DATA_FOUND THEN - RAISE_APPLICATION_ERROR(-20101, - 'Product ' || p_product_id || ' not found'); - WHEN OTHERS THEN - log_pkg.error( - p_message => SQLERRM, - p_backtrace => DBMS_UTILITY.FORMAT_ERROR_BACKTRACE, - p_stack => DBMS_UTILITY.FORMAT_ERROR_STACK - ); - RAISE; - END place_order; -END order_api; -/ -``` - -```python -# Python: Call the procedure with connection pooling -import oracledb - -pool = oracledb.create_pool( - user="app_user", - password="secret", - dsn="host.example.com:1521/FREEPDB1", - min=2, - max=10, - increment=1, -) - -def place_order(customer_id: int, product_id: int, quantity: int) -> int: - with pool.acquire() as conn: - with conn.cursor() as cur: - order_id = cur.var(oracledb.NUMBER) - cur.callproc("order_api.place_order", [ - customer_id, product_id, quantity, order_id, - ]) - conn.commit() - return int(order_id.getvalue()) - - -# Usage -new_order_id = place_order(customer_id=42, product_id=101, quantity=5) -print(f"Created order: {new_order_id}") -``` - - - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[OCI C/C++ Integration](references/oci.md)** -- RAII handle management, array fetch/bind, Instant Client build hygiene. -- **[26ai Container Operations](references/containers.md)** -- Image selection, Podman run workflows, persistence strategy. -- **[AI Vector Search](references/vectors.md)** -- VECTOR data type, distance functions, IVF/HNSW indexes, RAG patterns. -- **[Oracle SQL Patterns](references/sql_patterns.md)** -- Analytics, CTEs, MERGE, MODEL clause, flashback queries. -- **[PL/SQL Development](references/plsql.md)** -- Package architecture, BULK COLLECT/FORALL, RESULT_CACHE, TAPI. -- **[JSON in Oracle](references/json.md)** -- JSON storage, SQL/JSON functions, Duality Views (23ai+). -- **[Connection Patterns](references/connections.md)** -- python-oracledb, JDBC, node-oracledb, DRCP, pool sizing. -- **[Oracle REST Data Services](references/ords.md)** -- AutoREST, custom REST APIs, OAuth2, PL/SQL gateway. -- **[Oracle Patterns for AI Agents](references/agent_patterns.md)** -- Schema discovery, safe DML, ORA- error catalog. -- **[Performance Tuning](references/performance.md)** -- EXPLAIN PLAN, DBMS_XPLAN, AWR, index strategies. -- **[SQL*Plus & SQLcl](references/sqlplus.md)** -- SQLcl features, Liquibase integration, MCP server. -- **[Database Security](references/security.md)** -- VPD, TDE, Unified Auditing, DBMS_REDACT. -- **[Core DBA Administration](references/admin.md)** -- User management, RMAN, Data Pump. -- **[Oracle Enterprise Manager](references/oem.md)** -- OEM Cloud Control, Performance Hub, SQL Monitor. -- **[Schema Migration & DevOps](references/schema_migrations.md)** -- Liquibase, Flyway, EBR, utPLSQL. - -## Official References - -- Oracle Call Interface Programmer's Guide (19c): -- Oracle Instant Client install/config docs: -- Oracle Database Free docs: -- Oracle SQL and datatype references: -- Oracle Database Free: -- Oracle Container Registry (database/free): -- Oracle Property Graph / 26ai Lite container quick start: -- Podman run reference: -- Podman secret-create reference: - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Oracle SQL*Plus](https://github.com/cofin/flow/blob/main/templates/styleguides/databases/oracle_sqlplus.md) -- [Bash](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/bash.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/oracle/agents/openai.yaml b/plugins/flow/skills/oracle/agents/openai.yaml deleted file mode 100644 index 5db3d01..0000000 --- a/plugins/flow/skills/oracle/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Oracle Database" - short_description: "Oracle SQL, PL/SQL, drivers, ORDS, containers, vectors, and administration" diff --git a/plugins/flow/skills/oracle/references/admin.md b/plugins/flow/skills/oracle/references/admin.md deleted file mode 100644 index 6e3f86e..0000000 --- a/plugins/flow/skills/oracle/references/admin.md +++ /dev/null @@ -1,432 +0,0 @@ -# Core DBA Administration - -## Overview - -Use this reference for routine Oracle database administration: user and tablespace management, backup and recovery, undo and redo configuration, job scheduling, and data movement. These are the operations that keep an Oracle database running, recoverable, and organized. - -## User Management - -### Create Users - -```sql --- Basic user in a PDB -ALTER SESSION SET CONTAINER = FREEPDB1; - -CREATE USER app_user IDENTIFIED BY "SecurePass123!" - DEFAULT TABLESPACE app_data - TEMPORARY TABLESPACE temp - QUOTA 500M ON app_data; - -GRANT CREATE SESSION TO app_user; - --- Common user in CDB (prefix with C##) -CREATE USER C##admin_user IDENTIFIED BY "AdminPass123!" - CONTAINER = ALL; -``` - -### Password Profiles - -Profiles enforce password policies. Always assign a profile — the DEFAULT profile is too permissive for production. - -```sql -CREATE PROFILE app_profile LIMIT - PASSWORD_LIFE_TIME 90 - PASSWORD_REUSE_TIME 365 - PASSWORD_REUSE_MAX 12 - FAILED_LOGIN_ATTEMPTS 5 - PASSWORD_LOCK_TIME 1/24 -- lock for 1 hour - PASSWORD_GRACE_TIME 7 - PASSWORD_VERIFY_FUNCTION ora12c_verify_function; - -ALTER USER app_user PROFILE app_profile; -``` - -### Proxy Authentication - -Let a middle-tier connection pool authenticate as individual end users without knowing their passwords. This preserves audit identity. - -```sql --- Allow app_pool to connect as end_user -ALTER USER end_user GRANT CONNECT THROUGH app_pool; - --- Middle tier connects as: --- app_pool[end_user]/pool_password@db -``` - -### CDB vs PDB Users - -- **Common users** (`C##` prefix) exist in the root and all PDBs. Use for DBA accounts. -- **Local users** exist in a single PDB. Use for application accounts. -- Never create application accounts as common users. - -## Tablespace Management - -### Create Tablespaces - -```sql --- Standard smallfile tablespace -CREATE TABLESPACE app_data - DATAFILE '/opt/oracle/oradata/app_data01.dbf' SIZE 1G - AUTOEXTEND ON NEXT 256M MAXSIZE 10G; - --- Bigfile tablespace (single datafile, up to 128TB with 8K blocks) -CREATE BIGFILE TABLESPACE archive_data - DATAFILE '/opt/oracle/oradata/archive01.dbf' SIZE 10G - AUTOEXTEND ON NEXT 1G MAXSIZE UNLIMITED; -``` - -### When to Use Bigfile vs Smallfile - -- **Bigfile**: Fewer files to manage, simpler for very large tablespaces. Requires ASM or a filesystem that supports large files. Use for data warehouses and archive tablespaces. -- **Smallfile**: Default. Multiple datafiles spread across mount points. Better for OLTP where you want I/O spread across disks. - -### Monitor Space - -```sql --- Tablespace usage summary -SELECT tablespace_name, - ROUND(used_space * 8192 / 1024 / 1024) AS used_mb, - ROUND(tablespace_size * 8192 / 1024 / 1024) AS total_mb, - ROUND(used_percent, 1) AS pct_used -FROM DBA_TABLESPACE_USAGE_METRICS -ORDER BY used_percent DESC; - --- Datafile level detail -SELECT tablespace_name, file_name, - ROUND(bytes / 1024 / 1024) AS size_mb, - ROUND(maxbytes / 1024 / 1024) AS max_mb, - autoextensible -FROM DBA_DATA_FILES -ORDER BY tablespace_name; -``` - -### Add Space - -```sql --- Add a new datafile -ALTER TABLESPACE app_data - ADD DATAFILE '/opt/oracle/oradata/app_data02.dbf' SIZE 1G AUTOEXTEND ON; - --- Resize existing datafile -ALTER DATABASE DATAFILE '/opt/oracle/oradata/app_data01.dbf' RESIZE 2G; -``` - -## RMAN Backup and Recovery - -RMAN (Recovery Manager) is the only supported tool for Oracle database backup. Do not use OS-level file copies for production backups. - -### Architecture - -RMAN backs up datafiles, control files, archived redo logs, and the spfile. It tracks backup metadata in the control file and optionally in a recovery catalog database. - -### Common Backup Commands - -```bash -# Connect to RMAN -rman target / - -# Full database backup -RMAN> BACKUP DATABASE PLUS ARCHIVELOG; - -# Incremental level 0 (base) -RMAN> BACKUP INCREMENTAL LEVEL 0 DATABASE; - -# Incremental level 1 (changes since last level 0) -RMAN> BACKUP INCREMENTAL LEVEL 1 DATABASE; - -# Tablespace backup -RMAN> BACKUP TABLESPACE app_data; - -# Backup to a specific location -RMAN> BACKUP DATABASE FORMAT '/backup/rman/%d_%T_%s_%p.bkp'; -``` - -### Incremental Backup Strategy - -Use incremental backups to reduce backup time and storage. A common strategy: - -- **Sunday**: Level 0 (full base) -- **Daily**: Level 1 cumulative (all changes since last level 0) - -```text -RMAN> BACKUP INCREMENTAL LEVEL 1 CUMULATIVE DATABASE; -``` - -Cumulative level 1 backs up everything since the last level 0. Differential level 1 backs up only changes since the last level 1. Cumulative is simpler for recovery because you only need the level 0 + one level 1. - -### Restore and Recover - -```bash -# Complete recovery (database must be mounted, not open) -RMAN> RESTORE DATABASE; -RMAN> RECOVER DATABASE; -RMAN> ALTER DATABASE OPEN; - -# Point-in-time recovery -RMAN> RUN { - SET UNTIL TIME "TO_DATE('2026-03-25 14:00:00', 'YYYY-MM-DD HH24:MI:SS')"; - RESTORE DATABASE; - RECOVER DATABASE; -} -RMAN> ALTER DATABASE OPEN RESETLOGS; - -# Single tablespace recovery (database stays open) -RMAN> ALTER TABLESPACE app_data OFFLINE; -RMAN> RESTORE TABLESPACE app_data; -RMAN> RECOVER TABLESPACE app_data; -RMAN> ALTER TABLESPACE app_data ONLINE; -``` - -### Retention Policy - -```bash -# Keep backups for 30 days -RMAN> CONFIGURE RETENTION POLICY TO RECOVERY WINDOW OF 30 DAYS; - -# Delete obsolete backups -RMAN> DELETE OBSOLETE; - -# Report what would be deleted -RMAN> REPORT OBSOLETE; -``` - -## Undo Management - -Undo stores the before-image of data changes. It enables rollback, read consistency, and flashback queries. - -### Sizing - -Undersized undo causes `ORA-01555: snapshot too old`. Size undo to hold the longest-running query's read-consistency needs. - -```sql --- Check current undo usage -SELECT tablespace_name, status, SUM(bytes)/1024/1024 AS mb -FROM DBA_UNDO_EXTENTS -GROUP BY tablespace_name, status; - --- Calculate required undo size --- Formula: UNDO_SIZE = UNDO_RETENTION * UNDO_BLOCKS_PER_SEC * DB_BLOCK_SIZE -SELECT (ur * ups * bs) / 1024 / 1024 AS recommended_undo_mb -FROM ( - SELECT MAX(undoblks / ((end_time - begin_time) * 86400)) AS ups - FROM V$UNDOSTAT -), ( - SELECT value AS ur FROM V$PARAMETER WHERE name = 'undo_retention' -), ( - SELECT value AS bs FROM V$PARAMETER WHERE name = 'db_block_size' -); -``` - -### ORA-01555 Prevention - -- Increase `UNDO_RETENTION` (seconds) to match your longest query duration. -- Size the undo tablespace to support the retention period. -- Set `RETENTION GUARANTEE` if you cannot tolerate snapshot-too-old under any circumstances (this may cause DML to fail instead if undo space runs out). - -```sql -ALTER TABLESPACE undotbs1 RETENTION GUARANTEE; -ALTER SYSTEM SET UNDO_RETENTION = 3600; -- 1 hour -``` - -## Redo Log Management - -Redo logs record every change for crash recovery. Proper configuration prevents performance bottlenecks and data loss. - -### Enable ARCHIVELOG Mode - -Production databases must run in ARCHIVELOG mode. Without it, point-in-time recovery is impossible. - -```sql --- Check current mode -SELECT LOG_MODE FROM V$DATABASE; - --- Enable (requires restart) -SHUTDOWN IMMEDIATE; -STARTUP MOUNT; -ALTER DATABASE ARCHIVELOG; -ALTER DATABASE OPEN; -``` - -### Redo Log Sizing - -Undersized redo logs cause frequent log switches, which triggers excessive checkpointing. Target log switches every 15-20 minutes under peak load. - -```sql --- Check log switch frequency -SELECT TO_CHAR(first_time, 'YYYY-MM-DD HH24') AS hour, COUNT(*) AS switches -FROM V$LOG_HISTORY -WHERE first_time > SYSDATE - 1 -GROUP BY TO_CHAR(first_time, 'YYYY-MM-DD HH24') -ORDER BY 1; - --- Add larger redo log groups -ALTER DATABASE ADD LOGFILE GROUP 4 SIZE 1G; -ALTER DATABASE ADD LOGFILE GROUP 5 SIZE 1G; -ALTER DATABASE ADD LOGFILE GROUP 6 SIZE 1G; -``` - -### Multiplex Redo Logs - -Always maintain at least two members per group on separate disks. Losing all members of a redo group means data loss. - -```sql -ALTER DATABASE ADD LOGFILE MEMBER - '/disk2/redo/redo01b.log' TO GROUP 1; -``` - -## DBMS_SCHEDULER - -Use DBMS_SCHEDULER for recurring database jobs. It replaces the legacy DBMS_JOB package. - -### Simple Job - -```sql -BEGIN - DBMS_SCHEDULER.CREATE_JOB( - job_name => 'NIGHTLY_STATS', - job_type => 'PLSQL_BLOCK', - job_action => 'BEGIN DBMS_STATS.GATHER_SCHEMA_STATS(''HR'', OPTIONS => ''GATHER STALE''); END;', - start_date => SYSTIMESTAMP, - repeat_interval => 'FREQ=DAILY; BYHOUR=2; BYMINUTE=0', - enabled => TRUE, - comments => 'Gather stale stats for HR schema nightly at 2 AM' - ); -END; -/ -``` - -### Named Schedule and Program - -Separate the schedule from the action for reuse. - -```sql --- Define a reusable schedule -BEGIN - DBMS_SCHEDULER.CREATE_SCHEDULE( - schedule_name => 'WEEKDAY_MORNINGS', - repeat_interval => 'FREQ=WEEKLY; BYDAY=MON,TUE,WED,THU,FRI; BYHOUR=6', - comments => 'Every weekday at 6 AM' - ); -END; -/ - --- Define a reusable program -BEGIN - DBMS_SCHEDULER.CREATE_PROGRAM( - program_name => 'PURGE_OLD_LOGS', - program_type => 'PLSQL_BLOCK', - program_action => 'BEGIN DELETE FROM app_logs WHERE created_at < SYSDATE - 90; COMMIT; END;', - enabled => TRUE - ); -END; -/ - --- Combine them into a job -BEGIN - DBMS_SCHEDULER.CREATE_JOB( - job_name => 'DAILY_LOG_PURGE', - program_name => 'PURGE_OLD_LOGS', - schedule_name => 'WEEKDAY_MORNINGS', - enabled => TRUE - ); -END; -/ -``` - -### Monitor Jobs - -```sql --- Job run history -SELECT job_name, status, actual_start_date, run_duration, error# -FROM DBA_SCHEDULER_JOB_RUN_DETAILS -WHERE job_name = 'NIGHTLY_STATS' -ORDER BY actual_start_date DESC -FETCH FIRST 10 ROWS ONLY; - --- Currently running jobs -SELECT job_name, session_id, running_instance, elapsed_time -FROM DBA_SCHEDULER_RUNNING_JOBS; -``` - -## Data Pump (expdp / impdp) - -Data Pump is the standard tool for logical export and import. It replaces the legacy `exp`/`imp` utilities. - -### Directory Object Setup - -Data Pump reads from and writes to Oracle directory objects, not OS paths directly. - -```sql -CREATE OR REPLACE DIRECTORY dp_dir AS '/opt/oracle/datapump'; -GRANT READ, WRITE ON DIRECTORY dp_dir TO app_user; -``` - -### Export Patterns - -```bash -# Full schema export -expdp hr/password@FREEPDB1 \ - SCHEMAS=HR \ - DIRECTORY=dp_dir \ - DUMPFILE=hr_export_%U.dmp \ - LOGFILE=hr_export.log \ - PARALLEL=4 - -# Single table export -expdp hr/password@FREEPDB1 \ - TABLES=HR.EMPLOYEES \ - DIRECTORY=dp_dir \ - DUMPFILE=emp_export.dmp - -# Export with content filter -expdp hr/password@FREEPDB1 \ - TABLES=HR.ORDERS \ - QUERY="\"WHERE order_date > DATE '2026-01-01'\"" \ - DIRECTORY=dp_dir \ - DUMPFILE=recent_orders.dmp -``` - -### Import Patterns - -```bash -# Import into same schema -impdp hr/password@FREEPDB1 \ - SCHEMAS=HR \ - DIRECTORY=dp_dir \ - DUMPFILE=hr_export_%U.dmp \ - LOGFILE=hr_import.log \ - PARALLEL=4 - -# Remap schema (import HR data into HR_TEST) -impdp system/password@FREEPDB1 \ - REMAP_SCHEMA=HR:HR_TEST \ - DIRECTORY=dp_dir \ - DUMPFILE=hr_export_%U.dmp - -# Remap tablespace -impdp system/password@FREEPDB1 \ - REMAP_TABLESPACE=HR_DATA:TEST_DATA \ - DIRECTORY=dp_dir \ - DUMPFILE=hr_export_%U.dmp - -# Table exists action -impdp hr/password@FREEPDB1 \ - TABLES=HR.EMPLOYEES \ - TABLE_EXISTS_ACTION=REPLACE \ - DIRECTORY=dp_dir \ - DUMPFILE=emp_export.dmp -``` - -### Data Pump Tips - -- Use `%U` in DUMPFILE names with PARALLEL to create multiple dump files. -- Use `EXCLUDE` and `INCLUDE` to filter object types (e.g., `EXCLUDE=INDEX` to skip indexes). -- Use `CONTENT=DATA_ONLY` or `CONTENT=METADATA_ONLY` to split exports. -- Monitor running jobs with `expdp ATTACH` or `SELECT * FROM DBA_DATAPUMP_JOBS`. - -## Learn More (Official) - -- Oracle Database Administrator's Guide: -- RMAN Backup and Recovery Guide: -- Data Pump Utilities Guide: -- DBMS_SCHEDULER Reference: diff --git a/plugins/flow/skills/oracle/references/agent_patterns.md b/plugins/flow/skills/oracle/references/agent_patterns.md deleted file mode 100644 index 3d2d172..0000000 --- a/plugins/flow/skills/oracle/references/agent_patterns.md +++ /dev/null @@ -1,304 +0,0 @@ -# Oracle Patterns for AI Agents - -## Overview - -Use this reference when building or operating as an AI agent that interacts with Oracle Database. Covers schema discovery, safe DML patterns, common error diagnosis, idempotent DDL, and transaction safety. Every pattern is designed to be defensive — agents must never cause data loss or leave transactions in an ambiguous state. - ---- - -## Schema Discovery Queries - -Run these queries at the start of a session to understand the database structure before writing any DML. - -### Startup Introspection Sequence - -```sql --- 1. List schemas with objects (skip Oracle internal schemas). -SELECT username FROM all_users - WHERE oracle_maintained = 'N' - ORDER BY username; - --- 2. List tables in the target schema. -SELECT table_name, num_rows, last_analyzed - FROM all_tables - WHERE owner = :schema_name - ORDER BY table_name; - --- 3. List columns with types and nullability. -SELECT column_name, data_type, data_length, data_precision, data_scale, - nullable, data_default - FROM all_tab_columns - WHERE owner = :schema_name AND table_name = :table_name - ORDER BY column_id; - --- 4. List primary keys and unique constraints. -SELECT acc.constraint_name, acc.column_name, ac.constraint_type - FROM all_cons_columns acc - JOIN all_constraints ac - ON ac.owner = acc.owner - AND ac.constraint_name = acc.constraint_name - WHERE acc.owner = :schema_name AND acc.table_name = :table_name - AND ac.constraint_type IN ('P', 'U') - ORDER BY acc.constraint_name, acc.position; - --- 5. List foreign keys (to understand relationships). -SELECT ac.constraint_name, - acc.column_name AS fk_column, - ac.r_constraint_name, - rac.table_name AS referenced_table, - racc.column_name AS referenced_column - FROM all_constraints ac - JOIN all_cons_columns acc - ON acc.owner = ac.owner AND acc.constraint_name = ac.constraint_name - JOIN all_constraints rac - ON rac.owner = ac.r_owner AND rac.constraint_name = ac.r_constraint_name - JOIN all_cons_columns racc - ON racc.owner = rac.owner AND racc.constraint_name = rac.constraint_name - AND racc.position = acc.position - WHERE ac.owner = :schema_name AND ac.table_name = :table_name - AND ac.constraint_type = 'R' - ORDER BY ac.constraint_name, acc.position; - --- 6. List indexes (for query planning awareness). -SELECT index_name, index_type, uniqueness, - LISTAGG(column_name, ', ') WITHIN GROUP (ORDER BY column_position) AS columns - FROM all_ind_columns aic - JOIN all_indexes ai USING (owner, index_name, table_name) - WHERE aic.table_owner = :schema_name AND aic.table_name = :table_name - GROUP BY index_name, index_type, uniqueness - ORDER BY index_name; -``` - -**Why introspect first:** agents that skip discovery risk writing queries against wrong column names, violating constraints, or missing indexes that change optimal query shape. - ---- - -## Safe DML Patterns - -### Count Before Delete - -```sql --- Always check the row count before executing a DELETE. --- This prevents accidentally deleting more rows than intended. -SELECT COUNT(*) FROM orders WHERE status = 'CANCELLED' AND order_date < ADD_MONTHS(SYSDATE, -12); --- Review the count. Only proceed if it matches expectations. -DELETE FROM orders WHERE status = 'CANCELLED' AND order_date < ADD_MONTHS(SYSDATE, -12); -``` - -### SAVEPOINT Dry Runs - -```sql --- Execute the DML, inspect the result, then roll back if it looks wrong. --- This is the safest pattern for agents that need to verify before committing. -SAVEPOINT before_update; - -UPDATE customers SET tier = 'GOLD' WHERE lifetime_spend > 50000; --- Check: how many rows were affected? --- DBMS_OUTPUT.PUT_LINE(SQL%ROWCOUNT || ' rows updated'); - --- If the count is unexpected: -ROLLBACK TO before_update; - --- If the count is correct: -COMMIT; -``` - -### FETCH FIRST Guards - -```sql --- Limit destructive operations to a bounded number of rows. --- This prevents runaway deletes if a WHERE clause is broader than expected. -DELETE FROM audit_log - WHERE created_at < ADD_MONTHS(SYSDATE, -24) - FETCH FIRST 10000 ROWS ONLY; - --- Repeat in a loop until zero rows are deleted. --- Committing per batch avoids undo segment exhaustion. -``` - -### Batch Deletes with ROWNUM - -```sql --- Pre-12c alternative to FETCH FIRST. --- Delete in batches of 5000 to keep undo and redo manageable. -DECLARE - v_deleted NUMBER; -BEGIN - LOOP - DELETE FROM audit_log - WHERE created_at < ADD_MONTHS(SYSDATE, -24) - AND ROWNUM <= 5000; - v_deleted := SQL%ROWCOUNT; - COMMIT; - EXIT WHEN v_deleted = 0; - END LOOP; -END; -/ -``` - ---- - -## Top 25 ORA- Error Catalog - -| ORA Code | Name | Root Cause | Corrective Action | -|------------|-----------------------------|---------------------------------------------------------|----------------------------------------------------------------| -| ORA-00001 | Unique constraint violated | INSERT/UPDATE creates a duplicate key | Check existing data; use MERGE for upsert logic | -| ORA-00054 | Resource busy | Table/row locked by another session | Retry after delay; use NOWAIT or SKIP LOCKED to detect | -| ORA-00060 | Deadlock detected | Two sessions hold locks the other needs | Retry the transaction; reorder DML to acquire locks consistently | -| ORA-00904 | Invalid identifier | Column name typo or missing alias | Verify column exists in ALL_TAB_COLUMNS | -| ORA-00907 | Missing right parenthesis | Syntax error in SQL | Check for mismatched parens, missing commas, bad keywords | -| ORA-00913 | Too many values | INSERT has more values than columns | Match column list to VALUES list explicitly | -| ORA-00923 | FROM keyword not found | Missing FROM or syntax error before it | Review SELECT clause for missing commas or aliases | -| ORA-00936 | Missing expression | Incomplete clause (e.g., trailing comma in SELECT) | Remove trailing comma; complete the expression | -| ORA-00942 | Table or view does not exist | Wrong schema, missing synonym, or no privilege | Check ALL_TABLES; qualify with schema prefix; grant SELECT | -| ORA-00955 | Name already used | CREATE TABLE/INDEX on existing name | Use IF NOT EXISTS or check USER_OBJECTS first | -| ORA-01400 | Cannot insert NULL | NOT NULL column missing from INSERT | Provide a value or set a DEFAULT on the column | -| ORA-01422 | Exact fetch returns more | SELECT INTO returns multiple rows | Add WHERE conditions or use FETCH FIRST 1 ROW ONLY | -| ORA-01438 | Value too large for column | Numeric precision exceeded | Check NUMBER(p,s) definition; cast or round the value | -| ORA-01476 | Divisor is equal to zero | Division by zero in SQL or PL/SQL | Add NULLIF(denominator, 0) or NVL2 guard | -| ORA-01489 | Result too long for string | LISTAGG or concat exceeds 4000 bytes | Use ON OVERFLOW TRUNCATE (12c+) or XMLAGG alternative | -| ORA-01722 | Invalid number | Implicit conversion failed (string → number) | Use TO_NUMBER with explicit format; clean source data | -| ORA-01830 | Date format too long | Date format model mismatch | Use explicit TO_DATE/TO_TIMESTAMP with format mask | -| ORA-01843 | Not a valid month | NLS_DATE_LANGUAGE mismatch or bad date string | Set NLS explicitly; validate date strings before conversion | -| ORA-02291 | Integrity constraint - parent key not found | FK violation on INSERT/UPDATE | Verify parent row exists; insert parent first | -| ORA-02292 | Integrity constraint - child record found | FK violation on DELETE parent | Delete children first or use ON DELETE CASCADE | -| ORA-04091 | Mutating table | Trigger reads/writes its own table | Use compound trigger or move logic to AFTER STATEMENT section | -| ORA-06502 | PL/SQL numeric or value error | String too long for variable, or bad conversion | Check VARCHAR2 size; use SUBSTR to truncate safely | -| ORA-06512 | PL/SQL backtrace line | Stack frame in error trace (not the error itself) | Read the preceding ORA- code; this line gives the location | -| ORA-12154 | TNS: could not resolve | Service name not in tnsnames.ora or DNS | Verify TNS_ADMIN path, service name spelling, listener status | -| ORA-12541 | TNS: no listener | Listener not running or wrong host/port | Check `lsnrctl status`; verify host and port in connect string | - ---- - -## Idempotent DDL Patterns - -Agents must produce DDL that succeeds whether the object exists or not. - -### CREATE OR REPLACE (Views, Packages, Functions, Triggers) - -```sql --- These object types support CREATE OR REPLACE natively. -CREATE OR REPLACE VIEW active_customers_v AS - SELECT * FROM customers WHERE status = 'ACTIVE'; -``` - -### Existence Checks for Tables and Indexes - -```sql --- Oracle does not support IF NOT EXISTS for CREATE TABLE (pre-23ai). --- Use a PL/SQL block to check first. -DECLARE - v_exists NUMBER; -BEGIN - SELECT COUNT(*) INTO v_exists - FROM user_tables WHERE table_name = 'AUDIT_LOG'; - IF v_exists = 0 THEN - EXECUTE IMMEDIATE ' - CREATE TABLE audit_log ( - log_id NUMBER GENERATED ALWAYS AS IDENTITY, - log_time TIMESTAMP DEFAULT SYSTIMESTAMP, - severity VARCHAR2(10), - message VARCHAR2(4000) - )'; - END IF; -END; -/ - --- 23ai adds native IF NOT EXISTS support. -CREATE TABLE IF NOT EXISTS audit_log ( - log_id NUMBER GENERATED ALWAYS AS IDENTITY, - log_time TIMESTAMP DEFAULT SYSTIMESTAMP, - severity VARCHAR2(10), - message VARCHAR2(4000) -); -- 23ai only -``` - -### Idempotent Index Creation - -```sql -DECLARE - v_exists NUMBER; -BEGIN - SELECT COUNT(*) INTO v_exists - FROM user_indexes WHERE index_name = 'IDX_ORDERS_STATUS'; - IF v_exists = 0 THEN - EXECUTE IMMEDIATE 'CREATE INDEX idx_orders_status ON orders (status)'; - END IF; -END; -/ -``` - -### Idempotent Column Addition - -```sql -DECLARE - v_exists NUMBER; -BEGIN - SELECT COUNT(*) INTO v_exists - FROM user_tab_columns - WHERE table_name = 'ORDERS' AND column_name = 'PRIORITY'; - IF v_exists = 0 THEN - EXECUTE IMMEDIATE 'ALTER TABLE orders ADD (priority VARCHAR2(10) DEFAULT ''NORMAL'')'; - END IF; -END; -/ -``` - ---- - -## Transaction Safety - -### SAVEPOINT / ROLLBACK TO - -```sql --- Use SAVEPOINTs to create rollback points within a transaction. --- This lets you undo part of a multi-step operation without losing earlier work. -SAVEPOINT step_1_complete; - --- Step 2: risky operation -UPDATE inventory SET qty = qty - :order_qty WHERE product_id = :pid; - --- If step 2 fails or produces unexpected results: -ROLLBACK TO step_1_complete; --- Step 1's changes are still intact. -``` - -### Autonomous Transaction Isolation - -```sql --- Use autonomous transactions to log diagnostic information that persists --- even if the main transaction rolls back. --- NEVER use autonomous transactions for business data DML. -CREATE OR REPLACE PROCEDURE agent_log(p_action VARCHAR2, p_detail VARCHAR2) IS - PRAGMA AUTONOMOUS_TRANSACTION; -BEGIN - INSERT INTO agent_audit_log (action, detail, logged_at) - VALUES (p_action, p_detail, SYSTIMESTAMP); - COMMIT; -END; -/ -``` - -### Agent Transaction Rules - -1. **Never leave transactions open.** Always COMMIT or ROLLBACK before returning control. -2. **Use SAVEPOINT before every DML block** so you can roll back to a known-good state. -3. **Check SQL%ROWCOUNT after every DML statement.** If the affected row count is unexpected, ROLLBACK TO the savepoint. -4. **Prefer SELECT FOR UPDATE NOWAIT** when you need to lock rows — it fails immediately instead of blocking indefinitely. -5. **Set statement-level timeouts** when available to prevent runaway queries. - -```sql --- Lock rows explicitly with NOWAIT to detect contention immediately. -SELECT * FROM orders - WHERE order_id = :oid - FOR UPDATE NOWAIT; -- raises ORA-00054 if locked instead of waiting -``` - ---- - -## Official References - -- Oracle Database SQL Language Reference: -- Oracle Error Messages Reference: -- Oracle Database Concepts (Transaction Management): -- Oracle Database Reference (Data Dictionary Views): diff --git a/plugins/flow/skills/oracle/references/connections.md b/plugins/flow/skills/oracle/references/connections.md deleted file mode 100644 index 7541aea..0000000 --- a/plugins/flow/skills/oracle/references/connections.md +++ /dev/null @@ -1,301 +0,0 @@ -# Connection Patterns - -## Overview - -Use this reference when connecting to Oracle Database from application code. Covers the three most common drivers (Python, Java, Node.js), connection pooling strategies, and connection string configuration. The OCI C/C++ driver is documented separately in [oci.md](oci.md). - ---- - -## python-oracledb - -python-oracledb is the successor to cx_Oracle. It ships in two modes: **thin** (pure Python, zero native dependencies) and **thick** (loads Oracle Client libraries for advanced features like Advanced Queuing and Kerberos). - -### Thin Mode (Default) - -```python -import oracledb - -# Thin mode — no Instant Client required. -# Use this unless you need thick-only features. -conn = oracledb.connect( - user="app_user", - password="secret", - dsn="host.example.com:1521/FREEPDB1" -) - -with conn.cursor() as cur: - # Always use bind variables to prevent SQL injection - # and enable cursor sharing in Oracle's shared pool. - cur.execute("SELECT * FROM orders WHERE customer_id = :cid", {"cid": 42}) - rows = cur.fetchall() -``` - -### Thick Mode - -```python -# Enable thick mode before creating any connections. -# Point to the Instant Client directory if it is not on LD_LIBRARY_PATH. -oracledb.init_oracle_client(lib_dir="/opt/oracle/instantclient_23_7") -``` - -### Connection Pooling - -```python -# Create a pool at application startup; reuse it for the lifetime of the process. -# min/max/increment control how the pool grows and shrinks. -pool = oracledb.create_pool( - user="app_user", - password="secret", - dsn="host.example.com:1521/FREEPDB1", - min=2, - max=10, - increment=1 -) - -# Acquire/release with context manager. -with pool.acquire() as conn: - with conn.cursor() as cur: - cur.execute("SELECT SYSDATE FROM dual") -``` - -### Async Support (Thin Mode Only) - -```python -import oracledb -import asyncio - -async def main(): - pool = oracledb.create_pool_async( - user="app_user", password="secret", - dsn="host.example.com:1521/FREEPDB1", - min=2, max=10 - ) - async with pool.acquire() as conn: - async with conn.cursor() as cur: - await cur.execute("SELECT * FROM employees WHERE department_id = :d", {"d": 10}) - rows = await cur.fetchall() - -asyncio.run(main()) -``` - -### Wallet / mTLS Connections - -```python -# For Autonomous Database or any mTLS-configured instance. -# Unzip the wallet to a directory and point dsn to the TNS alias. -conn = oracledb.connect( - user="admin", - password="secret", - dsn="mydb_high", # TNS alias from tnsnames.ora in wallet - config_dir="/path/to/wallet", # directory containing tnsnames.ora + cwallet.sso - wallet_location="/path/to/wallet", - wallet_password=None # None if using auto-login wallet (cwallet.sso) -) -``` - -### LOB Handling - -```python -# By default, LOBs are returned as LOB locators (streamed reads). -# For small LOBs, fetch as bytes/string directly to avoid round-trips. -oracledb.defaults.fetch_lobs = False # returns CLOB as str, BLOB as bytes -``` - ---- - -## JDBC Thin Driver - -### Basic Connection with UCP - -```java -import oracle.ucp.jdbc.PoolDataSource; -import oracle.ucp.jdbc.PoolDataSourceFactory; - -// Universal Connection Pool (UCP) is Oracle's application-side pool. -// Prefer UCP over HikariCP when using Oracle-specific features -// (Fast Connection Failover, Transaction Affinity, runtime load balancing). -PoolDataSource pds = PoolDataSourceFactory.getPoolDataSource(); -pds.setConnectionFactoryClassName("oracle.jdbc.pool.OracleDataSource"); -pds.setURL("jdbc:oracle:thin:@//host.example.com:1521/FREEPDB1"); -pds.setUser("app_user"); -pds.setPassword("secret"); -pds.setMinPoolSize(2); -pds.setMaxPoolSize(20); -pds.setInitialPoolSize(5); - -try (Connection conn = pds.getConnection(); - PreparedStatement ps = conn.prepareStatement( - "SELECT * FROM orders WHERE status = ? AND region = ?")) { - ps.setString(1, "SHIPPED"); - ps.setString(2, "US-WEST"); - try (ResultSet rs = ps.executeQuery()) { - while (rs.next()) { - // process row - } - } -} -``` - -### Array Binding (Batch Insert) - -```java -// Array binding sends multiple rows in a single round-trip. -// Set the batch size to match your data volume — 100-1000 is typical. -try (PreparedStatement ps = conn.prepareStatement( - "INSERT INTO audit_log (event_type, payload) VALUES (?, ?)")) { - for (AuditEvent event : events) { - ps.setString(1, event.type()); - ps.setString(2, event.payload()); - ps.addBatch(); - } - ps.executeBatch(); -} -``` - -### Spring Boot Integration - -```yaml -# application.yml — Spring Boot with UCP -spring: - datasource: - url: jdbc:oracle:thin:@//host.example.com:1521/FREEPDB1 - username: app_user - password: secret - driver-class-name: oracle.jdbc.OracleDriver - type: oracle.ucp.jdbc.PoolDataSourceImpl - oracleucp: - min-pool-size: 2 - max-pool-size: 20 - initial-pool-size: 5 - connection-wait-timeout: 3 -``` - ---- - -## node-oracledb - -### Connection Pool with Async/Await - -```javascript -const oracledb = require("oracledb"); - -// Thin mode is the default since node-oracledb 6.0. -// No native compilation or Instant Client required. -async function init() { - // Create pool once at startup. - await oracledb.createPool({ - user: "app_user", - password: "secret", - connectString: "host.example.com:1521/FREEPDB1", - poolMin: 2, - poolMax: 10, - poolIncrement: 1, - }); -} - -async function getOrders(customerId) { - let conn; - try { - conn = await oracledb.getConnection(); // acquires from default pool - const result = await conn.execute( - "SELECT order_id, total FROM orders WHERE customer_id = :cid", - { cid: customerId }, - { outFormat: oracledb.OUT_FORMAT_OBJECT } - ); - return result.rows; - } finally { - if (conn) await conn.close(); // returns to pool - } -} -``` - -### Result Sets for Large Queries - -```javascript -// Use result sets to stream large results without buffering everything in memory. -const result = await conn.execute( - "SELECT * FROM large_table", - [], - { resultSet: true } -); - -let row; -while ((row = await result.resultSet.getRow())) { - process(row); -} -await result.resultSet.close(); -``` - ---- - -## Connection Pooling Deep Dive - -### Application-Side Pools - -| Driver | Pool Technology | Key Config | -|----------------|----------------------------|-----------------------------------------| -| python-oracledb | Built-in `create_pool` | `min`, `max`, `increment` | -| JDBC | UCP (Universal Connection Pool) | `MinPoolSize`, `MaxPoolSize`, `InitialPoolSize` | -| node-oracledb | Built-in pool | `poolMin`, `poolMax`, `poolIncrement` | - -### DRCP (Database Resident Connection Pooling) - -DRCP pools server processes on the database side. Use it when you have many application instances (hundreds of microservices) but each holds connections mostly idle. - -```sql --- Enable DRCP on the database. -EXEC DBMS_CONNECTION_POOL.START_POOL(); - --- Clients connect by appending :POOLED to the service name. --- python-oracledb example: -conn = oracledb.connect( - user="app_user", password="secret", - dsn="host.example.com:1521/FREEPDB1:POOLED" -) -``` - -### Pool Sizing Formula - -```text -max_pool_size = ceil(peak_concurrent_requests / avg_sql_executions_per_request) -``` - -Start conservative (2-5 per CPU core on the app server) and increase only when you observe connection-wait timeouts. Oversized pools waste database server processes and memory. - ---- - -## Connection String Formats - -### Easy Connect Plus - -```text -# Basic -host:port/service_name - -# With server type (dedicated/shared/pooled) -host:port/service_name:server=pooled - -# With failover list (19c+) -(DESCRIPTION=(CONNECT_TIMEOUT=5)(RETRY_COUNT=3)(ADDRESS_LIST=(ADDRESS=(HOST=primary)(PORT=1521))(ADDRESS=(HOST=standby)(PORT=1521)))(CONNECT_DATA=(SERVICE_NAME=myservice))) -``` - -### TNS_ADMIN and Wallet - -Set `TNS_ADMIN` to a directory containing `tnsnames.ora` and optionally wallet files (`cwallet.sso`, `ewallet.p12`, `sqlnet.ora`). - -```bash -export TNS_ADMIN=/etc/oracle/network -# tnsnames.ora defines named connection aliases: -# mydb_high = (DESCRIPTION=(...)) -``` - ---- - -## Official References - -- python-oracledb documentation: -- Oracle JDBC Developer's Guide: -- Oracle UCP Developer's Guide: -- node-oracledb documentation: -- DRCP documentation: -- Oracle Net Services Reference (tnsnames.ora): diff --git a/plugins/flow/skills/oracle/references/containers.md b/plugins/flow/skills/oracle/references/containers.md deleted file mode 100644 index b941775..0000000 --- a/plugins/flow/skills/oracle/references/containers.md +++ /dev/null @@ -1,82 +0,0 @@ -# Oracle 26ai Container - -## Overview - -Use this reference to choose the right Oracle 26ai image and run repeatable Podman workflows for dev/test environments. - -## Choose Image Flavor - -- Use Full image (`container-registry.oracle.com/database/free:latest`) when advanced features are required. -- Use Lite image (`container-registry.oracle.com/database/free:latest-lite`) when faster pull/start and smaller footprint are preferred (for example CI smoke tests). -- Expect Lite to exclude a set of advanced features; do not assume feature parity with Full. - -## Start Containers - -```bash -# Full image -podman run -d --name oracle26 container-registry.oracle.com/database/free:latest - -# Lite image -podman run -d --name oracle26lite container-registry.oracle.com/database/free:latest-lite -``` - -- Wait for `podman ps` to report status `healthy` before connecting clients. -- Expect random SYS/SYSTEM/PDBADMIN passwords when no password input is provided. -- For reproducible CI, pin explicit image tags or digests instead of relying only on mutable `latest` tags. - -## Configure Passwords - -- Prefer `--secret` inputs on Podman for sensitive credentials. -- Use `-e ORACLE_PWD=` only for local, low-risk workflows. -- Rotate account passwords after startup when needed: - -```bash -podman exec ./setPassword.sh -``` - -## Configure Persistence - -- Mount data at `/opt/oracle/oradata` to persist DB state across container recreation. -- Prefer named Podman volumes for fast startup from prebuilt datafiles. -- Expect first-time initialization to take significantly longer when mounting an empty host directory. -- Ensure mounted host paths are writable by container uid `54321` (`oracle` user in container). - -## Connect and Verify - -- Publish listener port `1521` with `-p :1521` or `-P`. -- Use service `FREE` for CDB root and `FREEPDB1` for default PDB. -- Connect from inside container: - -```bash -podman exec -it sqlplus system/@FREE -podman exec -it sqlplus pdbadmin/@FREEPDB1 -``` - -## Run Post-Setup and Startup Scripts - -- Mount setup scripts to `/opt/oracle/scripts/setup`. -- Mount startup scripts to `/opt/oracle/scripts/startup`. -- Use `.sql` or `.sh` files and prefix with numeric ordering (`01_`, `02_`, ...). -- Expect setup scripts to run only during fresh database creation; prebuilt DB startup alone does not re-run setup scripts. - -## Handle Full vs Lite Differences - -- `ORACLE_PWD` is available across Full and Lite images. -- Use Full-specific env toggles (`ORACLE_CHARACTERSET`, `ENABLE_ARCHIVELOG`, `ENABLE_FORCE_LOGGING`) only when Full image behavior is required. -- Use Lite-specific options (for example `ORACLE_PDB`) only when running Lite image workflows. -- Avoid Lite for scenarios that need Oracle True Cache or other excluded advanced components. - -## Use Recommended Defaults - -- Use Lite + ephemeral storage for CI validation and adapter smoke tests. -- Use Full + persisted volume + explicit port mapping for feature validation and deeper local debugging. -- Gate tests that depend on advanced Oracle features so Lite-based CI jobs skip them explicitly. - -## Official References - -- -- -- -- -- -- diff --git a/plugins/flow/skills/oracle/references/json.md b/plugins/flow/skills/oracle/references/json.md deleted file mode 100644 index 11acb87..0000000 --- a/plugins/flow/skills/oracle/references/json.md +++ /dev/null @@ -1,242 +0,0 @@ -# JSON in Oracle - -## Overview - -Use this reference when storing, querying, generating, or indexing JSON data in Oracle Database. Oracle's JSON support has evolved significantly across versions — this guide annotates every feature with the version that introduced it so you can target the right capabilities for your environment. - ---- - -## JSON Storage Options - -### VARCHAR2 / CLOB (12c+) - -```sql --- Prior to 21c, store JSON in a VARCHAR2 (up to 32767 bytes) or CLOB column. --- Add an IS JSON check constraint so the optimizer knows the column contains JSON --- and can apply JSON-specific optimizations. -CREATE TABLE events ( - event_id NUMBER GENERATED ALWAYS AS IDENTITY, - payload CLOB, - CONSTRAINT events_payload_json CHECK (payload IS JSON) -); -``` - -**Why the constraint matters:** without IS JSON, functions like JSON_VALUE still work, but the optimizer cannot use JSON search indexes and some query rewrites are blocked. - -### Native JSON Type (21c+) - -```sql --- The native JSON type stores a binary-parsed representation (OSON format). --- Queries skip text parsing entirely, which is significantly faster for --- complex documents. Use this when running 21c or later. -CREATE TABLE events ( - event_id NUMBER GENERATED ALWAYS AS IDENTITY, - payload JSON -); -``` - ---- - -## Dot Notation Access - -```sql --- Dot notation provides a concise path syntax for simple lookups. --- The column must have an IS JSON constraint or be of type JSON. --- Returns VARCHAR2 by default; returns NULL for missing paths. -SELECT e.payload.customer.name, - e.payload.customer.address.city - FROM events e - WHERE e.payload.customer.tier = 'gold'; -``` - -**Limitation:** dot notation cannot express array element access, type casts, or error handling. Use SQL/JSON functions for those cases. - ---- - -## SQL/JSON Functions - -### JSON_VALUE (12c+) - -Extract a scalar value from a JSON document. Returns NULL on missing path by default. - -```sql --- Extract a scalar and cast to a specific SQL type. -SELECT JSON_VALUE(payload, '$.order.total' RETURNING NUMBER) AS order_total, - JSON_VALUE(payload, '$.order.placed_at' RETURNING TIMESTAMP) AS placed_at - FROM events - WHERE JSON_VALUE(payload, '$.order.status') = 'shipped'; - --- Use ERROR ON ERROR to surface malformed data instead of silently returning NULL. -SELECT JSON_VALUE(payload, '$.order.total' RETURNING NUMBER ERROR ON ERROR) - FROM events; -``` - -### JSON_QUERY (12c+) - -Extract a JSON object or array (as opposed to a scalar). - -```sql --- Return the nested "items" array as a JSON fragment. -SELECT JSON_QUERY(payload, '$.order.items' WITH WRAPPER) AS items_json - FROM events - WHERE event_id = 42; -``` - -### JSON_EXISTS (12c+) - -Test whether a path exists. Use it in WHERE clauses for filtering. - -```sql --- Find events that have a discount applied. -SELECT event_id - FROM events - WHERE JSON_EXISTS(payload, '$.order.discount'); -``` - -### JSON_TABLE (12c+) - -Project JSON into relational rows and columns. This is the most powerful function — use it when you need to join JSON data with relational tables or unnest arrays. - -```sql --- Unnest an array of line items into relational rows. -SELECT e.event_id, jt.* - FROM events e, - JSON_TABLE(e.payload, '$.order.items[*]' - COLUMNS ( - line_num FOR ORDINALITY, - product_id VARCHAR2(50) PATH '$.sku', - quantity NUMBER PATH '$.qty', - price NUMBER PATH '$.unit_price' - ) - ) jt; -``` - ---- - -## JSON Duality Views (23ai+) - -Duality views let you expose relational tables as JSON documents and vice versa. Reads return JSON; writes accept JSON and Oracle decomposes them into relational DML automatically. - -```sql --- Create a duality view over normalized order/line-item tables. -CREATE JSON RELATIONAL DUALITY VIEW order_dv AS - orders @insert @update @delete { - _id : order_id, - customer : customer_id, - order_date : order_date, - items : order_items @insert @update @delete [ - { - line_id : item_id, - product : product_id, - quantity : qty, - unit_price : price - } - ] - }; - --- Read as JSON. -SELECT * FROM order_dv WHERE JSON_VALUE(data, '$._id') = 1001; - --- Insert via JSON. -INSERT INTO order_dv VALUES ('{"customer": 42, "order_date": "2026-03-26", "items": [{"product": "X100", "quantity": 2, "unit_price": 29.99}]}'); -``` - -**Why duality views:** they eliminate the impedance mismatch between document APIs and relational storage. The relational model handles integrity; the JSON interface handles developer ergonomics. Optimistic locking via ETags is built in. - ---- - -## JSON Indexing - -### Functional Indexes (12c+) - -```sql --- Index a specific scalar path for equality/range lookups. -CREATE INDEX idx_events_status ON events ( - JSON_VALUE(payload, '$.order.status' RETURNING VARCHAR2(30) ERROR ON ERROR) -); -``` - -### JSON Search Index (12c+) - -```sql --- A full-text JSON search index indexes all paths in the document. --- Use it when queries access many different paths and you cannot predict --- which paths will be filtered. -CREATE SEARCH INDEX idx_events_search ON events (payload) FOR JSON; -``` - -### Multivalue Index (21c+) - -```sql --- Index values inside JSON arrays so that containment checks are fast. --- Without this, array element lookups require full scans. -CREATE MULTIVALUE INDEX idx_events_tags ON events e ( - e.payload.tags.type() -); -``` - ---- - -## JSON Generation - -Build JSON from relational data for API responses, data export, or inter-system messaging. - -### JSON_OBJECT / JSON_ARRAY (12c+) - -```sql --- Build a JSON object from columns. -SELECT JSON_OBJECT( - KEY 'id' VALUE employee_id, - KEY 'name' VALUE full_name, - KEY 'dept' VALUE department_id - ABSENT ON NULL -- omit keys with NULL values - ) AS emp_json - FROM employees - WHERE department_id = 10; -``` - -### JSON_OBJECTAGG / JSON_ARRAYAGG (12c+) - -```sql --- Aggregate rows into a JSON array of objects. -SELECT JSON_ARRAYAGG( - JSON_OBJECT( - KEY 'id' VALUE employee_id, - KEY 'name' VALUE full_name - ) - ORDER BY full_name - RETURNING CLOB - ) AS team_json - FROM employees - WHERE department_id = 10; - --- Aggregate key-value pairs into a single JSON object. -SELECT JSON_OBJECTAGG(KEY param_name VALUE param_value) AS config_json - FROM system_params - WHERE category = 'email'; -``` - ---- - -## Version Compatibility Summary - -| Feature | Minimum Version | -|---------------------------|-----------------| -| IS JSON constraint | 12.1.0.2 | -| JSON_VALUE / JSON_QUERY | 12.1.0.2 | -| JSON_TABLE | 12.1.0.2 | -| JSON_EXISTS | 12.1.0.2 | -| JSON generation functions | 12.2 | -| JSON search index | 12.2 | -| Native JSON type (OSON) | 21c | -| Multivalue index | 21c | -| JSON Duality Views | 23ai | -| JSON Schema validation | 23ai | - ---- - -## Official References - -- Oracle JSON Developer's Guide (19c): -- Oracle JSON Developer's Guide (23ai): -- SQL/JSON Path Expressions: diff --git a/plugins/flow/skills/oracle/references/oci.md b/plugins/flow/skills/oracle/references/oci.md deleted file mode 100644 index 4c9c9a6..0000000 --- a/plugins/flow/skills/oracle/references/oci.md +++ /dev/null @@ -1,47 +0,0 @@ -# Oracle OCI Best Practices - -## Overview - -Use this reference when working with OCI-based data paths (connect, execute, fetch, bind, transaction control) and when configuring Oracle Instant Client across local dev and CI. - -## OCI implementation rules - -1. Wrap OCI handles in RAII-style owners to guarantee cleanup. -2. Check OCI return codes immediately and include context in diagnostics. -3. Keep NLS/session settings explicit instead of relying on ambient defaults. -4. Reuse prepared statements and array operations for throughput. - -## Connection and credential handling - -1. Prefer secrets/secure config flow instead of embedding credentials. -2. Support wallet/TNS-based flows where applicable. -3. Keep connection string parsing strict and explicit. -4. Cache connections only with clear invalidation/reconnect behavior. - -## Data-path best practices - -1. Use array fetch/bind for bulk transfer; tune prefetch/array sizes with measured data. -2. Be explicit in LOB and long-text handling. -3. Handle Oracle-specific types (for example spatial/vector) via deliberate conversion policies. -4. Treat transaction behavior as an explicit contract and test it. - -## Instant Client and build hygiene - -1. Ensure both Basic and SDK components are available when compiling OCI-dependent code. -2. Keep `ORACLE_HOME`/library path handling deterministic in local and CI builds. -3. Validate platform differences (Linux/macOS/Windows) with dedicated setup scripts. -4. Document fallback behavior when OCI libraries are not found. - -## Validation checklist - -1. Connection success/failure paths are covered with actionable messages. -2. Statement execution paths cover bind, fetch, and error cases. -3. Type mappings have targeted tests for edge values. -4. Integration tests run against a real Oracle container/environment. - -## Learn more (official) - -1. Oracle Call Interface Programmer's Guide (19c): -2. Oracle Instant Client install/config docs: -3. Oracle Database Free docs: -4. Oracle SQL and datatype references: diff --git a/plugins/flow/skills/oracle/references/oem.md b/plugins/flow/skills/oracle/references/oem.md deleted file mode 100644 index 3185062..0000000 --- a/plugins/flow/skills/oracle/references/oem.md +++ /dev/null @@ -1,210 +0,0 @@ -# Oracle Enterprise Manager - -## Overview - -Use this reference when working with Oracle Enterprise Manager (OEM) Cloud Control for monitoring, performance analysis, job management, and compliance. OEM provides a web-based interface for managing the full Oracle estate — databases, middleware, hosts, and cloud infrastructure. - -## Architecture - -OEM Cloud Control consists of three main components. Understand them to troubleshoot connectivity and performance issues. - -### Oracle Management Service (OMS) - -The middle tier that processes monitoring data, serves the web console, and communicates with agents. Deploy on a dedicated host for production. Multiple OMS instances provide high availability. - -### Management Agent - -A lightweight process installed on every monitored host. It collects metrics, runs jobs, and sends data to OMS. Keep agents at the same version as OMS or within one major release. - -### Management Repository - -An Oracle Database that stores all monitoring data, configuration, and job history. Size it generously — undersized repositories cause OEM sluggishness. Use RAC for the repository database in production. - -## Target Discovery and Monitoring - -### Discover Targets - -```bash -# Auto-discover targets on a host (after agent install) -emcli add_target -name="prod-db01" -type="oracle_database" \ - -host="prodhost01" -credentials="set_name:DBCredsNormal" - -# Discover all targets on a host -emcli discover_targets -host="prodhost01" -``` - -### Promote Discovered Targets - -After discovery, targets appear in the OEM console under "Targets > All Targets." Promote them to enable monitoring: - -1. Navigate to **Setup > Add Target > Auto Discovery Results**. -2. Select discovered targets and click **Promote**. -3. Assign monitoring credentials and preferred credentials for each target. - -### Set Preferred Credentials - -Preferred credentials let OEM run jobs and collect metrics without prompting for passwords. Set them per-target or globally. - -1. Navigate to **Setup > Security > Preferred Credentials**. -2. Set credentials for target types: Database, Host, Listener. -3. Use named credential sets to avoid duplicating credential entries. - -## Performance Hub - -Performance Hub consolidates real-time and historical performance data in a single view. Access it from any database target: **Performance > Performance Hub**. - -### Real-Time View - -- **ASH Analytics**: Visualize active sessions by wait class, SQL ID, module, or any dimension. Drill down by clicking a time slice. -- **SQL Monitoring**: Shows currently executing SQL statements with execution plan, progress, and parallel query details. -- **Blocking Sessions**: Identify lock holders and waiters in real time. - -### Historical View - -- Switches to AWR-based data for past analysis. -- Correlate SQL performance changes with time periods. -- Compare SQL plan changes over time using the Plan Comparison tab. - -## SQL Monitor - -SQL Monitor tracks individual SQL execution in detail. It activates automatically for SQL running longer than 5 seconds or using parallel execution. - -### Access SQL Monitor - -1. **Performance > SQL Monitor** from any database target. -2. Search by SQL ID, username, or time range. - -### What SQL Monitor Shows - -- Execution plan with actual rows, estimated rows, and time per operation. -- I/O statistics per plan step. -- Parallel execution details: distribution method, DOP, slave activity. -- Wait events per execution plan step. - -### Force Monitoring for Short SQL - -```sql -SELECT /*+ MONITOR */ e.name, d.dept_name -FROM employees e JOIN departments d ON e.dept_id = d.id; -``` - -Use the `MONITOR` hint to track statements that would not normally qualify. Remove the hint after debugging. - -## Custom Metrics and Alerts - -### Create Custom Metrics - -Define metrics that OEM collects on a schedule and evaluates against thresholds. - -1. Navigate to **Monitoring > Metric and Collection Settings**. -2. Click **Create Custom Metric** (or use **Metric Extensions** for reusable definitions). -3. Define the SQL query that returns the metric value. - -```sql --- Example: count of unprocessed orders older than 1 hour -SELECT COUNT(*) FROM orders -WHERE status = 'PENDING' - AND created_at < SYSDATE - 1/24; -``` - -1. Set warning and critical thresholds. -2. Assign a notification rule to send alerts via email, SNMP, or PagerDuty integration. - -### Metric Extensions - -Metric extensions package custom metrics for deployment across many targets. - -1. **Enterprise > Monitoring > Metric Extensions**. -2. Create, test, deploy to target groups. -3. Export/import metric extensions between OEM environments. - -### Alert Rules - -Configure incident rules to route alerts: - -1. **Setup > Incidents > Incident Rules**. -2. Define conditions (target type, metric, severity). -3. Assign actions: email notifications, event connectors, or auto-remediation scripts. - -## Job System - -OEM's job system schedules and executes tasks across managed targets. Use it for maintenance windows, patching, and administrative scripts. - -### Create a Job - -1. **Enterprise > Job > Create Job**. -2. Choose job type: SQL Script, OS Command, PL/SQL Block, RMAN Script. -3. Specify targets (single, group, or dynamic). -4. Set schedule: immediate, one-time, or recurring. - -### Common Job Types - -- **SQL Script**: Execute SQL or PL/SQL against database targets. -- **OS Command**: Run shell commands on host targets. -- **RMAN Script**: Backup/recovery operations. -- **Multi-Task Job**: Chain multiple steps with conditional logic. - -### Monitor Jobs - -Navigate to **Enterprise > Job > Activity** to see running, succeeded, and failed jobs. Click any job for execution details, output logs, and step-by-step status. - -### Job Best Practices - -- Use **Corrective Actions** to auto-remediate common alerts (e.g., restart a listener, clear temp space). -- Group targets into **Administration Groups** to apply jobs to dynamic sets of targets. -- Set **Blackout** periods during maintenance to suppress false alerts. - -## Compliance Frameworks - -OEM includes built-in compliance frameworks and supports custom standards. - -### Built-In Standards - -- **Oracle Database Security Configuration**: Checks for common misconfigurations (open passwords, excessive privileges, missing auditing). -- **CIS Benchmarks**: Center for Internet Security guidelines for Oracle Database. -- **STIG**: Security Technical Implementation Guide for government compliance. - -### Compliance Workflow - -1. **Enterprise > Compliance > Library**: Browse available standards. -2. Associate a standard with target groups. -3. OEM evaluates targets and generates a compliance score. -4. Drill into violations for remediation guidance. - -### Drift Detection - -Compare a target's configuration to a baseline or to other targets. - -1. **Enterprise > Configuration > Comparison Templates**. -2. Create a template defining which parameters to compare. -3. Compare a target against the template or against another target. -4. Generate a drift report showing differences. - -Use drift detection to verify that production, staging, and DR environments match. - -## Patch Management - -OEM automates Oracle software patching across the estate. - -### Workflow - -1. **Enterprise > Provisioning and Patching > Patches & Updates**. -2. Search for patches by number or product. -3. Download patches through OEM (requires My Oracle Support credentials). -4. Create a patching plan: select targets, patches, and schedule. -5. Run pre-checks to validate prerequisites. -6. Apply patches with automatic backup and rollback capability. - -### Best Practices - -- Patch non-production environments first and validate. -- Use the **Fleet Maintenance** feature for rolling patches across RAC clusters. -- Schedule patching during maintenance blackout windows. -- Keep OMS, agents, and repository patched to the same quarterly release. - -## Learn More (Official) - -- Oracle Enterprise Manager Documentation: -- OEM Cloud Control Administration Guide: -- OEM CLI (emcli) Reference: -- Performance Hub Documentation: diff --git a/plugins/flow/skills/oracle/references/ords.md b/plugins/flow/skills/oracle/references/ords.md deleted file mode 100644 index 331986a..0000000 --- a/plugins/flow/skills/oracle/references/ords.md +++ /dev/null @@ -1,347 +0,0 @@ -# Oracle REST Data Services (ORDS) - -## Overview - -Use this reference when exposing Oracle Database functionality over HTTP via ORDS. Covers architecture, AutoREST for zero-code CRUD, custom REST API definitions, OAuth2 security, and PL/SQL gateway integration. - ---- - -## Architecture - -ORDS is a Java application that maps HTTP requests to database operations. It runs in two deployment modes: - -- **Standalone (Jetty)** — simplest to set up; suitable for development, small deployments, and container-based architectures. ORDS bundles its own Jetty server. -- **Deployed (Tomcat / WebLogic)** — use for enterprise deployments where you need centralized app-server management, clustering, or existing middleware integration. - -### Request Flow - -```text -Client → HTTPS → ORDS (Jetty/Tomcat) → Connection Pool → Oracle Database - ↓ - URL routing: - /ords/{schema}/{module}/{template} -``` - -ORDS maintains a JDBC connection pool to the database. Each REST request acquires a connection, executes the mapped SQL or PL/SQL, and releases the connection. Stateless by design. - ---- - -## Module / Template / Handler Hierarchy - -ORDS organizes REST endpoints in a three-level hierarchy: - -1. **Module** — a logical grouping (like a microservice boundary). Has a base path. -2. **Template** — a URI pattern within the module. Supports path parameters (`:id`). -3. **Handler** — the HTTP method binding (GET, POST, PUT, DELETE) on a template. Contains the SQL or PL/SQL source. - -```text -Module: /api/v1/ (base path) -Template: /api/v1/orders/ (collection) -Template: /api/v1/orders/:id (single item) -Handler: GET on /api/v1/orders/ → SELECT query -Handler: POST on /api/v1/orders/ → INSERT + RETURNING -Handler: GET on /api/v1/orders/:id → SELECT WHERE order_id = :id -Handler: PUT on /api/v1/orders/:id → UPDATE WHERE order_id = :id -``` - ---- - -## AutoREST - -AutoREST generates CRUD endpoints automatically for enabled schemas and objects. Use it for rapid prototyping or when the default REST patterns are sufficient. - -### Enable a Schema - -```sql --- Run as ORDS_ADMIN or a DBA. --- p_enabled => TRUE turns on AutoREST for the schema. --- p_schema_alias sets the URL segment. -BEGIN - ORDS.ENABLE_SCHEMA( - p_enabled => TRUE, - p_schema => 'HR', - p_url_mapping_type => 'BASE_PATH', - p_url_mapping_pattern => 'hr', - p_auto_rest_auth => TRUE -- require authentication by default - ); - COMMIT; -END; -/ -``` - -### Enable a Table/View - -```sql --- AutoREST on a specific object generates GET (list + item), POST, PUT, DELETE. -BEGIN - ORDS.ENABLE_OBJECT( - p_enabled => TRUE, - p_schema => 'HR', - p_object => 'EMPLOYEES', - p_object_type => 'TABLE', - p_object_alias => 'employees' - ); - COMMIT; -END; -/ - --- Resulting endpoints: --- GET /ords/hr/employees/ → paginated list with ?q= filtering --- GET /ords/hr/employees/:id → single row --- POST /ords/hr/employees/ → insert --- PUT /ords/hr/employees/:id → update --- DELETE /ords/hr/employees/:id → delete -``` - -**Built-in features:** AutoREST endpoints support pagination (`?offset=`, `?limit=`), filtering (`?q={"department_id":10}`), ordering, and metadata discovery (`/metadata-catalog/`). - ---- - -## Custom REST APIs - -Use custom modules when you need business logic, joins, aggregations, or non-CRUD operations that AutoREST cannot express. - -### Define a Module, Template, and Handler - -```sql -BEGIN - -- Create the module (logical grouping). - ORDS.DEFINE_MODULE( - p_module_name => 'orders_api', - p_base_path => '/api/v1/', - p_items_per_page => 25, - p_status => 'PUBLISHED', - p_comments => 'Order management API' - ); - - -- Create a collection template. - ORDS.DEFINE_TEMPLATE( - p_module_name => 'orders_api', - p_pattern => 'orders/', - p_comments => 'Order collection' - ); - - -- Bind a GET handler that returns recent orders with customer info. - ORDS.DEFINE_HANDLER( - p_module_name => 'orders_api', - p_pattern => 'orders/', - p_method => 'GET', - p_source_type => 'json/collection', - p_source => q'[ - SELECT o.order_id, o.order_date, o.total, - c.full_name AS customer_name - FROM orders o - JOIN customers c ON c.customer_id = o.customer_id - WHERE o.order_date >= ADD_MONTHS(SYSDATE, -3) - ORDER BY o.order_date DESC - ]' - ); - - -- Single-item template with path parameter. - ORDS.DEFINE_TEMPLATE( - p_module_name => 'orders_api', - p_pattern => 'orders/:order_id' - ); - - ORDS.DEFINE_HANDLER( - p_module_name => 'orders_api', - p_pattern => 'orders/:order_id', - p_method => 'GET', - p_source_type => 'json/item', - p_source => q'[ - SELECT o.*, c.full_name AS customer_name - FROM orders o - JOIN customers c ON c.customer_id = o.customer_id - WHERE o.order_id = :order_id - ]' - ); - - COMMIT; -END; -/ -``` - -### Handler Source Types - -| Source Type | Use When | -|-----------------------|--------------------------------------------------------| -| `json/collection` | GET returning multiple rows (paginated automatically) | -| `json/item` | GET returning a single row | -| `json/query` | GET returning raw query result (no pagination wrapper) | -| `plsql/block` | POST/PUT/DELETE executing PL/SQL | - ---- - -## OAuth2 Security - -ORDS supports OAuth2 for securing REST endpoints. Choose the flow based on your client type. - -### Client Credentials Flow (Machine-to-Machine) - -```sql --- Register an OAuth2 client. -BEGIN - OAUTH.CREATE_CLIENT( - p_name => 'batch_processor', - p_grant_type => 'client_credentials', - p_owner => 'Operations Team', - p_support_email => 'ops@example.com', - p_privilege_names => 'orders_priv' - ); - - -- Grant the client's role access to the module. - OAUTH.GRANT_CLIENT_ROLE( - p_client_name => 'batch_processor', - p_role_name => 'orders_role' - ); - COMMIT; -END; -/ - --- Client obtains a token: --- POST /ords/{schema}/oauth/token --- Authorization: Basic base64(client_id:client_secret) --- Body: grant_type=client_credentials -``` - -### Authorization Code Flow (User-Facing Apps) - -```sql -BEGIN - OAUTH.CREATE_CLIENT( - p_name => 'web_app', - p_grant_type => 'authorization_code', - p_owner => 'Dev Team', - p_support_email => 'dev@example.com', - p_redirect_uri => 'https://app.example.com/callback', - p_privilege_names => 'orders_priv' - ); - COMMIT; -END; -/ -``` - -### Protecting Endpoints with Privileges - -```sql --- Create a privilege that gates access. -BEGIN - ORDS.CREATE_PRIVILEGE( - p_name => 'orders_priv', - p_role_name => 'orders_role', - p_label => 'Order API Access', - p_description => 'Access to order management endpoints' - ); - - -- Map the privilege to URI patterns. - ORDS.CREATE_PRIVILEGE_MAPPING( - p_privilege_name => 'orders_priv', - p_pattern => '/api/v1/orders/*' - ); - COMMIT; -END; -/ -``` - ---- - -## PL/SQL Gateway - -The PL/SQL gateway lets you call stored procedures directly from REST endpoints. Use it when your business logic already lives in PL/SQL packages. - -### Calling a Procedure - -```sql -ORDS.DEFINE_HANDLER( - p_module_name => 'orders_api', - p_pattern => 'orders/', - p_method => 'POST', - p_source_type => 'plsql/block', - p_source => q'[ - BEGIN - order_api.place_order( - p_customer_id => :customer_id, - p_product_id => :product_id, - p_quantity => :quantity, - p_order_id => :order_id -- OUT bind - ); - END; - ]' -); -``` - -### Returning a REF CURSOR - -```sql --- Return a result set from PL/SQL via SYS_REFCURSOR. -ORDS.DEFINE_HANDLER( - p_module_name => 'orders_api', - p_pattern => 'orders/by-customer/:customer_id', - p_method => 'GET', - p_source_type => 'plsql/block', - p_source => q'[ - BEGIN - order_api.get_orders( - p_customer_id => :customer_id, - p_cursor => :result_set -- ORDS binds this as the response body - ); - END; - ]' -); -``` - -### BLOB Streaming - -```sql --- Stream binary content (PDF, image) directly from a BLOB column. --- Set Content-Type via the :content_type bind. -ORDS.DEFINE_HANDLER( - p_module_name => 'docs_api', - p_pattern => 'documents/:doc_id', - p_method => 'GET', - p_source_type => 'plsql/block', - p_source => q'[ - BEGIN - SELECT mime_type, content - INTO :content_type, :blob_content - FROM documents - WHERE doc_id = :doc_id; - END; - ]' -); -``` - ---- - -## Configuration and Deployment - -### Standalone Quick Start - -```bash -# Install ORDS and configure a database connection. -ords install --interactive - -# Start the standalone server. -ords serve --port 8443 --secure - -# Default URL: https://localhost:8443/ords/ -``` - -### Key Configuration Properties - -| Property | Purpose | -|---------------------------------------|----------------------------------------------| -| `db.connectionType` | `basic`, `tns`, `customurl` | -| `jdbc.MaxLimit` | Max connections in ORDS pool | -| `jdbc.InitialLimit` | Connections created at startup | -| `security.requestValidationFunction` | PL/SQL function for custom auth validation | -| `misc.pagination.maxRows` | Hard limit on rows per page (default 10000) | - ---- - -## Official References - -- ORDS Documentation: -- ORDS Developer's Guide: -- ORDS PL/SQL API: -- ORDS Installation Guide: diff --git a/plugins/flow/skills/oracle/references/performance.md b/plugins/flow/skills/oracle/references/performance.md deleted file mode 100644 index ba8d091..0000000 --- a/plugins/flow/skills/oracle/references/performance.md +++ /dev/null @@ -1,281 +0,0 @@ -# Performance Tuning - -## Overview - -Use this reference when diagnosing slow queries, choosing index strategies, interpreting execution plans, or tuning Oracle memory and workload characteristics. Performance tuning is iterative: measure first, change one thing, measure again. - -## Reading Execution Plans - -### Generate Plans - -Use `EXPLAIN PLAN` for estimated plans and `DBMS_XPLAN.DISPLAY_CURSOR` for actual runtime statistics. Prefer actual stats because the optimizer's row estimates can be wildly wrong. - -```sql --- Estimated plan -EXPLAIN PLAN FOR -SELECT /*+ GATHER_PLAN_STATISTICS */ e.name, d.dept_name -FROM employees e JOIN departments d ON e.dept_id = d.id -WHERE e.salary > 100000; - -SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY(format => 'TYPICAL')); - --- Actual plan with runtime stats (run the query first) -SELECT /*+ GATHER_PLAN_STATISTICS */ e.name, d.dept_name -FROM employees e JOIN departments d ON e.dept_id = d.id -WHERE e.salary > 100000; - -SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR(format => 'ALLSTATS LAST')); -``` - -### Interpret the Plan - -- **A-Rows vs E-Rows**: Compare actual rows returned to estimated rows. A ratio beyond 10x signals stale or missing statistics. -- **Starts**: How many times an operation executed. High starts in a nested loop means the inner table is hit repeatedly — consider a hash join. -- **Cost**: Relative measure within a single plan only. Never compare cost numbers across different queries. -- **Predicate Information**: Look for `access` (index-driven) vs `filter` (post-fetch discard). Filters on high-volume steps waste I/O. -- **TABLE ACCESS FULL**: Not always bad. For analytical queries touching most rows, a full scan beats an index lookup. - -## Index Strategy - -Choose the index type based on the data and query pattern, not by default. - -### B-tree Indexes - -Default workhorse. Use for high-cardinality columns in equality and range predicates. - -```sql -CREATE INDEX idx_emp_salary ON employees(salary); -``` - -### Bitmap Indexes - -Use for low-cardinality columns (status, gender, region) in data warehouse workloads. Never use in OLTP — bitmap indexes cause severe contention under concurrent DML. - -```sql -CREATE BITMAP INDEX idx_order_status ON orders(status); -``` - -### Function-Based Indexes - -When queries apply functions to columns, a standard index is useless. Index the expression instead. - -```sql -CREATE INDEX idx_emp_upper_name ON employees(UPPER(last_name)); - --- This query now uses the index -SELECT * FROM employees WHERE UPPER(last_name) = 'SMITH'; -``` - -### Composite Indexes - -Column order matters. Place equality columns first, range columns last. The index is useful for queries that filter on a leading prefix. - -```sql --- Supports: WHERE dept_id = 10, WHERE dept_id = 10 AND salary > 50000 --- Does NOT support: WHERE salary > 50000 (alone) -CREATE INDEX idx_emp_dept_sal ON employees(dept_id, salary); -``` - -### Invisible Indexes - -Test a new index on production without affecting existing plans. The optimizer ignores invisible indexes unless a session explicitly enables them. - -```sql -CREATE INDEX idx_emp_hire ON employees(hire_date) INVISIBLE; - --- Test in your session only -ALTER SESSION SET OPTIMIZER_USE_INVISIBLE_INDEXES = TRUE; -``` - -## Gathering Statistics with DBMS_STATS - -Statistics drive optimizer decisions. Stale or missing stats are the single most common cause of bad plans. - -### Basic Gathering - -```sql --- Gather for a single table -EXEC DBMS_STATS.GATHER_TABLE_STATS('HR', 'EMPLOYEES', CASCADE => TRUE); - --- Gather for entire schema -EXEC DBMS_STATS.GATHER_SCHEMA_STATS('HR'); - --- Gather stale stats only (efficient for scheduled jobs) -EXEC DBMS_STATS.GATHER_SCHEMA_STATS('HR', OPTIONS => 'GATHER STALE'); -``` - -### Histograms - -Histograms help the optimizer understand skewed data distributions. Without them, Oracle assumes uniform distribution, which produces bad cardinality estimates for skewed columns. - -```sql --- Force histogram on a specific column -EXEC DBMS_STATS.GATHER_TABLE_STATS('HR', 'EMPLOYEES', - METHOD_OPT => 'FOR COLUMNS SIZE 254 department_id'); -``` - -### Extended Statistics - -Capture column group correlations that the optimizer cannot infer on its own. - -```sql --- Tell the optimizer that country and state are correlated -SELECT DBMS_STATS.CREATE_EXTENDED_STATS('HR', 'CUSTOMERS', '(country, state)') -FROM DUAL; - -EXEC DBMS_STATS.GATHER_TABLE_STATS('HR', 'CUSTOMERS'); -``` - -### Pending Stats - -Test stats before publishing them to production workloads. - -```sql -EXEC DBMS_STATS.SET_TABLE_PREFS('HR', 'EMPLOYEES', 'PUBLISH', 'FALSE'); -EXEC DBMS_STATS.GATHER_TABLE_STATS('HR', 'EMPLOYEES'); - --- Verify the pending stats with your problem query -ALTER SESSION SET OPTIMIZER_USE_PENDING_STATISTICS = TRUE; - --- If the plan improves, publish -EXEC DBMS_STATS.PUBLISH_PENDING_STATS('HR', 'EMPLOYEES'); -``` - -### Detect Stale Stats - -```sql -SELECT table_name, last_analyzed, stale_stats -FROM ALL_TAB_STATISTICS -WHERE owner = 'HR' AND stale_stats = 'YES'; -``` - -## AWR Reports - -AWR (Automatic Workload Repository) captures periodic performance snapshots. Use AWR reports for after-the-fact analysis of performance degradation. - -### Generate a Report - -```sql --- List available snapshots -SELECT snap_id, begin_interval_time FROM DBA_HIST_SNAPSHOT -ORDER BY snap_id DESC FETCH FIRST 20 ROWS ONLY; - --- Generate HTML report between two snapshots -@$ORACLE_HOME/rdbms/admin/awrrpt.sql - --- Programmatic generation -EXEC DBMS_WORKLOAD_REPOSITORY.CREATE_SNAPSHOT; -- force a snapshot now -``` - -### Key Sections to Read - -- **Top SQL by Elapsed Time**: Identifies the heaviest queries. Start tuning here. -- **Top Wait Events**: Reveals what the database spends time waiting for (I/O, locks, latches). -- **Instance Efficiency**: Buffer cache hit ratio, parse ratios. Low values signal configuration issues. -- **Segment Statistics**: Identifies hot tables and indexes causing the most physical reads. - -## Active Session History (ASH) - -ASH samples active sessions every second. Use it for real-time and recent-past diagnosis without the overhead of tracing. - -```sql --- What is happening right now? -SELECT sql_id, event, session_state, COUNT(*) -FROM V$ACTIVE_SESSION_HISTORY -WHERE sample_time > SYSDATE - INTERVAL '5' MINUTE -GROUP BY sql_id, event, session_state -ORDER BY COUNT(*) DESC; - --- Who is blocking whom? -SELECT blocking_session, session_id, sql_id, event, wait_time -FROM V$ACTIVE_SESSION_HISTORY -WHERE blocking_session IS NOT NULL - AND sample_time > SYSDATE - INTERVAL '10' MINUTE; - --- Historical ASH (from AWR, for analysis beyond the ASH buffer) -SELECT sql_id, event, COUNT(*) -FROM DBA_HIST_ACTIVE_SESS_HISTORY -WHERE sample_time BETWEEN TIMESTAMP '2026-03-25 14:00:00' - AND TIMESTAMP '2026-03-25 15:00:00' -GROUP BY sql_id, event -ORDER BY COUNT(*) DESC; -``` - -## Common Wait Events and Remediation - -| Wait Event | Meaning | Action | -|---|---|---| -| `db file sequential read` | Single-block I/O (index lookup) | Check for excessive index access; verify statistics | -| `db file scattered read` | Multi-block I/O (full table scan) | Expected for large scans; reduce if scan is unintended | -| `log file sync` | Commit waiting for redo write | Reduce commit frequency; check redo log I/O performance | -| `enq: TX - row lock contention` | Row-level lock conflict | Investigate application logic; reduce transaction duration | -| `latch: shared pool` | Parsing contention | Use bind variables; increase shared pool if undersized | -| `direct path read` | Parallel or serial direct read | Generally normal for large operations | -| `buffer busy waits` | Contention on a hot block | Reduce index contention; consider hash partitioning | - -## SGA/PGA Memory Tuning - -### SGA - -- **Buffer Cache** (`DB_CACHE_SIZE`): Caches data blocks. Size it so the buffer cache hit ratio stays above 95% for OLTP. -- **Shared Pool** (`SHARED_POOL_SIZE`): Stores parsed SQL and PL/SQL. Hard-parsing (not using bind variables) thrashes this. -- **Large Pool** (`LARGE_POOL_SIZE`): Used by RMAN, parallel execution, shared servers. Size separately from the shared pool. -- Use `SGA_TARGET` for automatic SGA management. Set `SGA_TARGET` and let Oracle distribute among components. - -### PGA - -- **PGA Aggregate Target** (`PGA_AGGREGATE_TARGET`): Controls memory for sorts, hash joins, bitmap merges. -- Undersized PGA forces operations to disk (`TEMP` tablespace), which is orders of magnitude slower. -- Monitor with `V$PGA_TARGET_ADVICE` to find the optimal setting. - -```sql --- Check PGA advice -SELECT pga_target_for_estimate/1024/1024 AS pga_mb, - estd_extra_bytes_rw, estd_pga_cache_hit_percentage -FROM V$PGA_TARGET_ADVICE; -``` - -## Optimizer Hints - -Use hints sparingly and as a last resort. Hints override the optimizer, which means they do not adapt when data changes. A hint that helps today can hurt tomorrow. - -### When Hints Are Justified - -- Emergency production fix while you gather proper stats or file a bug. -- Forcing parallelism for a known-heavy batch job. -- Working around a confirmed optimizer bug with a specific plan shape. - -### Common Hints - -```sql --- Force join order: process employees first, then departments -SELECT /*+ LEADING(e d) */ e.name, d.dept_name -FROM employees e JOIN departments d ON e.dept_id = d.id; - --- Force nested loop join (good when driving table is small) -SELECT /*+ USE_NL(d) */ e.name, d.dept_name -FROM employees e JOIN departments d ON e.dept_id = d.id; - --- Force full table scan (skip index when you know you need most rows) -SELECT /*+ FULL(e) */ * FROM employees e WHERE salary > 10000; - --- Force a specific index -SELECT /*+ INDEX(e idx_emp_salary) */ * FROM employees e WHERE salary > 10000; - --- Parallel execution -SELECT /*+ PARALLEL(e, 4) */ * FROM employees e; -``` - -### Why Not to Rely on Hints - -- They embed physical assumptions (index names, table sizes) into SQL text. -- They prevent the optimizer from adapting to data growth. -- They create maintenance burden: if you rename an index, the hint silently stops working. -- Prefer fixing the root cause: gather stats, add the right index, restructure the query. - -## Learn More (Official) - -- Oracle Database Performance Tuning Guide: -- DBMS_XPLAN Reference: -- DBMS_STATS Reference: -- AWR/ADDM Documentation: diff --git a/plugins/flow/skills/oracle/references/plsql.md b/plugins/flow/skills/oracle/references/plsql.md deleted file mode 100644 index eb98c88..0000000 --- a/plugins/flow/skills/oracle/references/plsql.md +++ /dev/null @@ -1,296 +0,0 @@ -# PL/SQL Development - -## Overview - -Use this reference when writing or reviewing PL/SQL: package design, error handling, bulk operations, performance tuning, and common architectural patterns. Every recommendation explains the underlying reason so you can adapt it to your context. - ---- - -## Package Spec / Body Architecture - -Packages are the primary unit of PL/SQL API design. The spec declares the public contract; the body hides implementation. - -```sql -CREATE OR REPLACE PACKAGE order_api AS - -- Public types first, then constants, then subprograms. - SUBTYPE order_id_t IS orders.order_id%TYPE; - - gc_max_line_items CONSTANT PLS_INTEGER := 500; - - PROCEDURE place_order( - p_customer_id IN customers.customer_id%TYPE, - p_items IN order_item_tab_t, - p_order_id OUT order_id_t - ); - - FUNCTION get_total(p_order_id IN order_id_t) RETURN NUMBER; -END order_api; -/ -``` - -**Design rules:** - -1. Anchor parameter types to table columns with `%TYPE` / `%ROWTYPE` so DDL changes propagate automatically. -2. Keep the spec minimal — expose only what callers need. Move helper logic to body-private subprograms. -3. Use subtypes (`SUBTYPE order_id_t IS ...`) to give semantic names to raw types. -4. Initialize package state in the body initialization block, not in variable declarations, so you can handle exceptions. - ---- - -## Exception Handling - -### Exception Hierarchy - -```sql --- Predefined exceptions: NO_DATA_FOUND, TOO_MANY_ROWS, DUP_VAL_ON_INDEX, etc. --- These are already declared in the STANDARD package. - --- User-defined exceptions: declare in the package spec when callers need to catch them. -e_order_locked EXCEPTION; -PRAGMA EXCEPTION_INIT(e_order_locked, -54); -- bind to ORA-00054 (resource busy) - --- RAISE_APPLICATION_ERROR for custom error codes visible to SQL callers. --- Use the range -20000 to -20999. -RAISE_APPLICATION_ERROR(-20100, 'Order ' || p_order_id || ' exceeds credit limit'); -``` - -### Diagnostic Stack Capture - -```sql --- DBMS_UTILITY.FORMAT_ERROR_BACKTRACE returns the line-number stack trace. --- FORMAT_ERROR_STACK returns the error message chain. --- Always log both — the backtrace tells you WHERE; the stack tells you WHAT. -EXCEPTION - WHEN OTHERS THEN - log_pkg.error( - p_message => SQLERRM, - p_backtrace => DBMS_UTILITY.FORMAT_ERROR_BACKTRACE, - p_stack => DBMS_UTILITY.FORMAT_ERROR_STACK - ); - RAISE; -- re-raise after logging; never silently swallow exceptions -END; -``` - -**Why re-raise:** swallowing exceptions hides bugs. Log-and-raise preserves the diagnostic trail while letting the caller decide the recovery strategy. - ---- - -## BULK COLLECT / FORALL - -Context switches between the SQL engine and the PL/SQL engine are expensive. BULK COLLECT and FORALL minimize them by processing arrays instead of individual rows. - -### BULK COLLECT with LIMIT - -```sql --- Always use LIMIT to cap memory consumption. --- Without LIMIT, a 10M-row table loads entirely into PGA. -DECLARE - TYPE order_tab_t IS TABLE OF orders%ROWTYPE; - l_orders order_tab_t; - CURSOR c_pending IS - SELECT * FROM orders WHERE status = 'PENDING'; -BEGIN - OPEN c_pending; - LOOP - FETCH c_pending BULK COLLECT INTO l_orders LIMIT 1000; - EXIT WHEN l_orders.COUNT = 0; - - FORALL i IN 1 .. l_orders.COUNT - UPDATE orders - SET status = 'PROCESSING', updated_at = SYSDATE - WHERE order_id = l_orders(i).order_id; - - COMMIT; -- commit per batch to avoid undo pressure - END LOOP; - CLOSE c_pending; -END; -/ -``` - -### SAVE EXCEPTIONS - -```sql --- SAVE EXCEPTIONS tells FORALL to continue past individual row failures. --- Inspect SQL%BULK_EXCEPTIONS after the block to handle failures. -BEGIN - FORALL i IN 1 .. l_items.COUNT SAVE EXCEPTIONS - INSERT INTO order_items VALUES l_items(i); -EXCEPTION - WHEN OTHERS THEN - IF SQLCODE = -24381 THEN -- ORA-24381: error(s) in array DML - FOR j IN 1 .. SQL%BULK_EXCEPTIONS.COUNT LOOP - log_pkg.warn('Row ' || SQL%BULK_EXCEPTIONS(j).ERROR_INDEX - || ' failed: ORA-' || SQL%BULK_EXCEPTIONS(j).ERROR_CODE); - END LOOP; - ELSE - RAISE; - END IF; -END; -/ -``` - ---- - -## Context Switch Minimization - -Every SQL statement inside a PL/SQL loop incurs a context switch. Strategies to reduce this: - -1. **BULK COLLECT + FORALL** — process arrays, not scalars. -2. **PRAGMA UDF** — hint to the SQL engine that a PL/SQL function is designed for SQL use; reduces per-row switch overhead (12c+). -3. **RESULT_CACHE** — cache deterministic function results in SGA; subsequent calls skip execution entirely. -4. **Pure SQL** — rewrite row-by-row PL/SQL as a single MERGE, analytic query, or MODEL clause when possible. - ---- - -## Pipelined Functions - -Pipelined functions return rows incrementally as they are produced, reducing memory and enabling producer-consumer parallelism. - -```sql -CREATE OR REPLACE FUNCTION get_large_dataset(p_dept_id NUMBER) - RETURN emp_tab_t PIPELINED -AS -BEGIN - FOR rec IN (SELECT * FROM employees WHERE department_id = p_dept_id) LOOP - PIPE ROW(emp_obj_t(rec.employee_id, rec.full_name, rec.salary)); - END LOOP; - RETURN; -END; -/ - --- Consume it like a table. -SELECT * FROM TABLE(get_large_dataset(50)) WHERE salary > 80000; -``` - -**When to use pipelined functions:** ETL transformations, row generation, or any case where materializing the full result set before returning is impractical. - ---- - -## RESULT_CACHE and PRAGMA UDF - -```sql --- RESULT_CACHE stores the function's return value keyed by input parameters. --- Oracle automatically invalidates the cache when underlying tables change. -CREATE OR REPLACE FUNCTION get_tax_rate(p_region_code VARCHAR2) - RETURN NUMBER RESULT_CACHE RELIES_ON (tax_rates) -IS - v_rate NUMBER; -BEGIN - SELECT rate INTO v_rate FROM tax_rates WHERE region_code = p_region_code; - RETURN v_rate; -END; -/ - --- PRAGMA UDF reduces context-switch overhead when calling from SQL. -CREATE OR REPLACE FUNCTION format_phone(p_raw VARCHAR2) RETURN VARCHAR2 IS - PRAGMA UDF; -BEGIN - RETURN '(' || SUBSTR(p_raw,1,3) || ') ' || SUBSTR(p_raw,4,3) || '-' || SUBSTR(p_raw,7); -END; -/ -``` - ---- - -## Collections - -| Type | Indexed By | Sparse? | Usable in SQL? | Best For | -|--------------------|----------------|---------|----------------|--------------------------------------------| -| Associative array | PLS_INTEGER or VARCHAR2 | Yes | No | PL/SQL lookup tables, caches | -| Nested table | INTEGER (1..N) | After DELETE | Yes | BULK COLLECT targets, SQL TABLE() operator | -| VARRAY | INTEGER (1..N) | No | Yes | Fixed-size ordered lists (e.g., top-N) | - -**Rule of thumb:** use associative arrays for PL/SQL-only work, nested tables when you need SQL interop or BULK COLLECT, varrays when the maximum cardinality is known and small. - ---- - -## Cursor Patterns - -### Implicit Cursor (Cursor FOR Loop) - -```sql --- Simplest pattern. Oracle manages open/fetch/close. --- Use when processing every row and you don't need BULK COLLECT. -FOR rec IN (SELECT employee_id, salary FROM employees WHERE department_id = 10) LOOP - process_employee(rec.employee_id, rec.salary); -END LOOP; -``` - -### Explicit Cursor - -```sql --- Use when you need BULK COLLECT with LIMIT or must re-open with different binds. -CURSOR c_emps (p_dept_id NUMBER) IS - SELECT employee_id, salary FROM employees WHERE department_id = p_dept_id; -``` - -### REF CURSOR / SYS_REFCURSOR - -```sql --- Use SYS_REFCURSOR to return result sets to callers (JDBC, ORDS, other PL/SQL). -PROCEDURE get_orders( - p_customer_id IN NUMBER, - p_cursor OUT SYS_REFCURSOR -) IS -BEGIN - OPEN p_cursor FOR - SELECT order_id, order_date, total - FROM orders - WHERE customer_id = p_customer_id - ORDER BY order_date DESC; -END; -``` - ---- - -## Autonomous Transactions (Logging Pattern) - -```sql --- Autonomous transactions commit independently of the calling transaction. --- Use them for logging/auditing so that log entries persist even if the --- caller rolls back. -CREATE OR REPLACE PROCEDURE log_event( - p_severity VARCHAR2, p_message VARCHAR2 -) IS - PRAGMA AUTONOMOUS_TRANSACTION; -BEGIN - INSERT INTO app_log (log_time, severity, message) - VALUES (SYSTIMESTAMP, p_severity, p_message); - COMMIT; -END; -/ -``` - -**Warning:** never use autonomous transactions for business logic DML. They bypass the caller's transaction boundary, which creates consistency hazards. - ---- - -## Table API (TAPI) Pattern - -A TAPI wraps each table with a package that provides insert/update/delete/get procedures. This centralizes DML, enforces business rules, and makes bulk operations consistent. - -```sql -CREATE OR REPLACE PACKAGE customers_tapi AS - PROCEDURE ins(p_row IN customers%ROWTYPE); - PROCEDURE upd(p_row IN customers%ROWTYPE); - PROCEDURE del(p_customer_id IN customers.customer_id%TYPE); - FUNCTION get_by_id(p_customer_id IN customers.customer_id%TYPE) RETURN customers%ROWTYPE; - PROCEDURE bulk_ins(p_rows IN customer_tab_t); -END customers_tapi; -/ -``` - -**Why TAPI:** - -1. Single place to add auditing, validation, or change-capture triggers. -2. BULK operations are centralized — callers don't reinvent FORALL logic. -3. Testing is straightforward: mock the TAPI, not individual SQL statements. -4. Schema changes propagate through `%ROWTYPE` anchoring. - ---- - -## Official References - -- PL/SQL Language Reference: -- PL/SQL Packages and Types Reference: -- Database Development Guide: diff --git a/plugins/flow/skills/oracle/references/schema_migrations.md b/plugins/flow/skills/oracle/references/schema_migrations.md deleted file mode 100644 index 40f45bd..0000000 --- a/plugins/flow/skills/oracle/references/schema_migrations.md +++ /dev/null @@ -1,519 +0,0 @@ -# Schema Migration & DevOps - -## Overview - -Use this reference when managing Oracle schema changes through version-controlled migrations, performing zero-downtime deployments, running automated tests, or operating multitenant PDB environments. Treat schema changes with the same rigor as application code: version them, test them, review them, automate them. - -## Liquibase with Oracle - -Liquibase tracks schema changes via changelogs. Each changeset is an atomic, idempotent migration unit identified by author and ID. - -### Changelog Structure - -```xml - - - - - - - - - - - - - - - - - - - - - - - - -``` - -### Oracle-Specific Changeset Patterns - -Use `sql` and `sqlFile` changesets for Oracle features that Liquibase's cross-platform XML does not cover. - -```xml - - - - - DROP PACKAGE customer_pkg - - - - - - - CREATE INDEX idx_cust_email ON customers(UPPER(email)) - - - DROP INDEX idx_cust_email - - -``` - -### Wallet Connections - -Avoid passwords in Liquibase properties files. Use Oracle Wallet for secure, credential-free connections. - -```properties -# liquibase.properties -url=jdbc:oracle:thin:@mydb_tns_alias -driver=oracle.jdbc.OracleDriver -# No username/password — wallet handles authentication -``` - -Set `TNS_ADMIN` to point to the wallet location before running Liquibase. - -### SQLcl Liquibase Integration - -SQLcl includes Liquibase natively. Use it to avoid managing a separate Liquibase installation. - -```sql --- Generate changelog from existing schema -lb generate-schema -split - --- Apply pending changes -lb update -changelog-file controller.xml - --- Rollback last 2 changesets -lb rollback -count 2 - --- Diff two schemas -lb diff -reference-url jdbc:oracle:thin:@other_db -``` - -## Flyway with Oracle - -Flyway uses numbered SQL scripts for migrations. It is simpler than Liquibase but less flexible for rollbacks. - -### Naming Conventions - -```text -sql/ -├── V1__create_customers.sql # versioned migration -├── V2__add_status_column.sql # versioned migration -├── V3__create_orders_table.sql # versioned migration -├── R__customer_view.sql # repeatable (re-run on change) -└── afterMigrate__grant_permissions.sql # callback -``` - -- **V** prefix: Versioned migrations run once, in order. Never edit after applying. -- **R** prefix: Repeatable migrations re-run whenever the checksum changes. Use for views, packages, and grants. -- **Callbacks**: `beforeMigrate`, `afterMigrate`, etc. Use `afterMigrate` for grants and synonym creation. - -### Oracle-Specific Flyway Configuration - -```properties -# flyway.conf -flyway.url=jdbc:oracle:thin:@//localhost:1521/FREEPDB1 -flyway.user=hr -flyway.schemas=HR -flyway.defaultSchema=HR -flyway.oracleSqlplus=true # enable SQL*Plus commands in scripts -flyway.oracleSqlplusWarn=true # warn on unsupported SQL*Plus commands -flyway.placeholders.tablespace=APP_DATA -``` - -### PL/SQL in Flyway Scripts - -Flyway needs an explicit delimiter for PL/SQL blocks. - -```sql --- V4__create_audit_trigger.sql -CREATE OR REPLACE TRIGGER customers_audit_trg - AFTER INSERT OR UPDATE OR DELETE ON customers - FOR EACH ROW -BEGIN - INSERT INTO audit_log(table_name, action, changed_at) - VALUES ('CUSTOMERS', - CASE WHEN INSERTING THEN 'INSERT' WHEN UPDATING THEN 'UPDATE' ELSE 'DELETE' END, - SYSTIMESTAMP); -END; -/ -``` - -## DBMS_REDEFINITION: Online Table Restructuring - -Restructure a table (add columns, change partitioning, move tablespace) while the table remains available for DML. No downtime. - -### Workflow - -```sql --- 1. Verify the table can be redefined -EXEC DBMS_REDEFINITION.CAN_REDEF_TABLE('HR', 'EMPLOYEES'); - --- 2. Create the interim table with the desired new structure -CREATE TABLE hr.employees_interim ( - id NUMBER(19) NOT NULL, - name VARCHAR2(200), - email VARCHAR2(255), - salary NUMBER(10,2), - dept_id NUMBER(10), - created_at TIMESTAMP DEFAULT SYSTIMESTAMP -) PARTITION BY RANGE (created_at) ( - PARTITION p2025 VALUES LESS THAN (TIMESTAMP '2026-01-01 00:00:00'), - PARTITION p2026 VALUES LESS THAN (TIMESTAMP '2027-01-01 00:00:00'), - PARTITION pmax VALUES LESS THAN (MAXVALUE) -); - --- 3. Start redefinition (Oracle copies data and tracks changes) -BEGIN - DBMS_REDEFINITION.START_REDEF_TABLE( - uname => 'HR', - orig_table => 'EMPLOYEES', - int_table => 'EMPLOYEES_INTERIM', - col_mapping => 'ID id, NAME name, EMAIL email, SALARY salary, DEPT_ID dept_id, CREATED_AT created_at' - ); -END; -/ - --- 4. Copy dependent objects (indexes, triggers, grants, constraints) -DECLARE - n_errors PLS_INTEGER; -BEGIN - DBMS_REDEFINITION.COPY_TABLE_DEPENDENTS( - uname => 'HR', - orig_table => 'EMPLOYEES', - int_table => 'EMPLOYEES_INTERIM', - num_errors => n_errors - ); - IF n_errors > 0 THEN - -- Check DBA_REDEFINITION_ERRORS for details - RAISE_APPLICATION_ERROR(-20001, n_errors || ' errors copying dependents'); - END IF; -END; -/ - --- 5. Sync interim table with changes made during redefinition -EXEC DBMS_REDEFINITION.SYNC_INTERIM_TABLE('HR', 'EMPLOYEES', 'EMPLOYEES_INTERIM'); - --- 6. Finish (atomic swap — brief lock) -EXEC DBMS_REDEFINITION.FINISH_REDEF_TABLE('HR', 'EMPLOYEES', 'EMPLOYEES_INTERIM'); - --- 7. Clean up the old table (now named EMPLOYEES_INTERIM after swap) -DROP TABLE hr.employees_interim PURGE; -``` - -## Edition-Based Redefinition (EBR) - -EBR enables zero-downtime application upgrades by letting old and new code coexist. Users on the old edition see the old schema; users on the new edition see the new schema. - -### Core Concepts - -- **Edition**: A named version of the database's PL/SQL code and editioning views. -- **Editioning View**: A view on a base table that controls which columns each edition sees. -- **Crossedition Trigger**: Synchronizes data between editions during the transition period. - -### Setup - -```sql --- Enable editions for a schema -ALTER USER hr ENABLE EDITIONS; - --- Create a new edition -CREATE EDITION v2 AS CHILD OF ora$base; - --- Switch to the new edition -ALTER SESSION SET EDITION = v2; -``` - -### Deployment Workflow - -1. **Create new edition** as a child of the current edition. -2. **Modify editioning views** in the new edition to expose new columns. -3. **Create crossedition triggers** to sync data between old and new column layouts. -4. **Deploy new PL/SQL** in the new edition. -5. **Switch application connections** to the new edition. -6. **Drop the old edition** after all sessions have migrated. - -```sql --- Editioning view in new edition -CREATE OR REPLACE EDITIONING VIEW hr.employees_ev AS -SELECT id, first_name, last_name, full_name, email, salary, dept_id -FROM hr.employees_base; - --- Forward crossedition trigger: populate new columns from old -CREATE OR REPLACE TRIGGER hr.employees_fwd_xed - BEFORE INSERT OR UPDATE ON hr.employees_base - FOR EACH ROW - FORWARD CROSSEDITION -BEGIN - :NEW.full_name := :NEW.first_name || ' ' || :NEW.last_name; -END; -/ -``` - -### When to Use EBR - -- Large-scale deployments where you cannot afford downtime. -- Gradual rollouts where old and new application versions run simultaneously. -- EBR adds complexity. For simple deployments, online DDL or DBMS_REDEFINITION may be sufficient. - -## utPLSQL Testing Framework - -utPLSQL is the standard unit testing framework for PL/SQL. Treat PL/SQL tests like application tests: run them in CI on every commit. - -### Test Package Structure - -```sql -CREATE OR REPLACE PACKAGE test_customer_pkg AS - -- %suite(Customer Package Tests) - -- %suitepath(hr.customers) - - -- %test(Creates a new customer and returns a valid ID) - PROCEDURE test_create_customer; - - -- %test(Rejects duplicate email addresses) - -- %throws(-1, -20001) - PROCEDURE test_duplicate_email; - - -- %beforeall - PROCEDURE setup_test_data; - - -- %afterall - PROCEDURE teardown_test_data; -END; -/ - -CREATE OR REPLACE PACKAGE BODY test_customer_pkg AS - - PROCEDURE setup_test_data IS - BEGIN - INSERT INTO customers(id, email, status) - VALUES (test_seq.NEXTVAL, 'setup@example.com', 'ACTIVE'); - COMMIT; - END; - - PROCEDURE test_create_customer IS - v_id customers.id%TYPE; - BEGIN - v_id := customer_pkg.create_customer('new@example.com', 'New Customer'); - ut.expect(v_id).to_be_greater_than(0); - - -- Verify the record exists - ut.expect( - SCALAR('SELECT COUNT(*) FROM customers WHERE id = ' || v_id) - ).to_equal(1); - END; - - PROCEDURE test_duplicate_email IS - v_id customers.id%TYPE; - BEGIN - v_id := customer_pkg.create_customer('setup@example.com', 'Duplicate'); - -- Should never reach here — %throws expects an exception - END; - - PROCEDURE teardown_test_data IS - BEGIN - DELETE FROM customers WHERE email LIKE '%@example.com'; - COMMIT; - END; - -END; -/ -``` - -### Run Tests - -```sql --- Run all tests -EXEC ut.run(); - --- Run a specific test suite -EXEC ut.run('test_customer_pkg'); - --- Run with specific reporter -EXEC ut.run(a_reporter => ut_junit_reporter()); - --- Run from command line (for CI) --- Uses utPLSQL-cli -utplsql run hr/password@localhost:1521/FREEPDB1 \ - -f=ut_junit_reporter -o=test-results.xml \ - -f=ut_coverage_html_reporter -o=coverage.html -``` - -### CI/CD Integration - -```bash -# In your CI pipeline -utplsql run "${DB_USER}/${DB_PASS}@${DB_HOST}:${DB_PORT}/${DB_SERVICE}" \ - -f=ut_junit_reporter -o=test-results.xml \ - -f=ut_sonar_test_reporter -o=sonar-report.xml \ - -f=ut_coverage_sonar_reporter -o=coverage.xml \ - --coverage-schemes="${DB_USER}" -``` - -Parse test-results.xml with your CI tool's JUnit reporter. Upload coverage to SonarQube for visibility. - -### Assertions - -```sql --- Equality -ut.expect(v_actual).to_equal(v_expected); - --- Null checks -ut.expect(v_value).to_be_null(); -ut.expect(v_value).not_to_be_null(); - --- Comparison -ut.expect(v_count).to_be_greater_than(0); -ut.expect(v_count).to_be_between(1, 100); - --- String matching -ut.expect(v_name).to_be_like('Smith%'); -ut.expect(v_email).to_match('^[a-z]+@'); - --- Cursor comparison (compare result sets) -ut.expect(SYS_REFCURSOR).to_equal(SYS_REFCURSOR); -``` - -## Online DDL Operations - -Oracle supports online operations that avoid blocking DML. Use them for production changes. - -### Online Index Operations - -```sql --- Create index without blocking DML -CREATE INDEX idx_orders_date ON orders(order_date) ONLINE; - --- Rebuild index online -ALTER INDEX idx_orders_date REBUILD ONLINE; - --- Drop index (no ONLINE keyword needed — drops are instant metadata ops) -DROP INDEX idx_orders_date; -``` - -### Online Table Move - -Move a table to a new tablespace or compress it without downtime. Available in 12.2+. - -```sql --- Move table online (indexes automatically maintained in 12.2+) -ALTER TABLE orders MOVE ONLINE; - --- Move to a different tablespace -ALTER TABLE orders MOVE TABLESPACE archive_data ONLINE; - --- Move with compression -ALTER TABLE orders MOVE TABLESPACE archive_data - ROW STORE COMPRESS ADVANCED ONLINE; -``` - -### Online Partition Operations - -```sql --- Split partition online -ALTER TABLE orders SPLIT PARTITION p2026 - AT (TIMESTAMP '2026-07-01 00:00:00') - INTO (PARTITION p2026h1, PARTITION p2026h2) ONLINE; - --- Merge partitions online -ALTER TABLE orders MERGE PARTITIONS p2024q1, p2024q2 - INTO PARTITION p2024h1 ONLINE; -``` - -## CDB/PDB Multitenant Architecture - -Oracle multitenant lets a single Container Database (CDB) host multiple Pluggable Databases (PDBs). Each PDB is an isolated database from the application perspective. - -### Create a PDB - -```sql --- Create PDB from seed -CREATE PLUGGABLE DATABASE sales_pdb - ADMIN USER pdb_admin IDENTIFIED BY "PdbPass123!" - FILE_NAME_CONVERT = ('/pdbseed/', '/sales_pdb/') - DEFAULT TABLESPACE sales_data - DATAFILE '/opt/oracle/oradata/sales_data01.dbf' SIZE 1G AUTOEXTEND ON; - -ALTER PLUGGABLE DATABASE sales_pdb OPEN; - --- Save open state so PDB opens automatically on CDB restart -ALTER PLUGGABLE DATABASE sales_pdb SAVE STATE; -``` - -### Clone a PDB - -Clone for testing or staging. The source PDB must be in READ ONLY mode during a local clone (or use hot clone in 12.2+). - -```sql --- Hot clone (source stays open, 12.2+) -CREATE PLUGGABLE DATABASE sales_test FROM sales_pdb; -ALTER PLUGGABLE DATABASE sales_test OPEN; -``` - -### Unplug and Plug - -Move a PDB between CDBs by unplugging to an XML manifest and plugging into the target CDB. - -```sql --- Unplug from source CDB -ALTER PLUGGABLE DATABASE sales_pdb CLOSE IMMEDIATE; -ALTER PLUGGABLE DATABASE sales_pdb UNPLUG INTO '/tmp/sales_pdb.xml'; -DROP PLUGGABLE DATABASE sales_pdb KEEP DATAFILES; - --- Plug into target CDB -CREATE PLUGGABLE DATABASE sales_pdb USING '/tmp/sales_pdb.xml' - COPY - FILE_NAME_CONVERT = ('/source_path/', '/target_path/'); -ALTER PLUGGABLE DATABASE sales_pdb OPEN; -``` - -### Application Containers - -Application containers (12.2+) let you install shared application objects (tables, PL/SQL, metadata) once and propagate them to all PDBs in the container. - -```sql --- Create application container -CREATE PLUGGABLE DATABASE app_root AS APPLICATION CONTAINER - ADMIN USER app_admin IDENTIFIED BY "AppPass123!"; - --- Install application -ALTER PLUGGABLE DATABASE APPLICATION myapp BEGIN INSTALL '1.0'; --- Create shared objects here (tables, packages, etc.) -ALTER PLUGGABLE DATABASE APPLICATION myapp END INSTALL '1.0'; - --- Sync PDBs to pick up application changes -ALTER SESSION SET CONTAINER = app_pdb1; -ALTER PLUGGABLE DATABASE APPLICATION myapp SYNC; -``` - -### Monitor PDBs - -```sql --- List all PDBs and their status -SELECT pdb_id, pdb_name, status, open_mode FROM CDB_PDBS; - --- Resource usage per PDB -SELECT con_id, pdb_name, - ROUND(allocated_size / 1024 / 1024) AS allocated_mb, - ROUND(used_size / 1024 / 1024) AS used_mb -FROM V$PDBS p JOIN CDB_TABLESPACE_USAGE_METRICS m ON p.con_id = m.con_id; -``` - -## Learn More (Official) - -- Liquibase Oracle Extension: -- Flyway Oracle Support: -- utPLSQL Documentation: -- DBMS_REDEFINITION Reference: -- Edition-Based Redefinition Guide: -- Multitenant Administration Guide: diff --git a/plugins/flow/skills/oracle/references/security.md b/plugins/flow/skills/oracle/references/security.md deleted file mode 100644 index dc9e31c..0000000 --- a/plugins/flow/skills/oracle/references/security.md +++ /dev/null @@ -1,329 +0,0 @@ -# Database Security - -## Overview - -Use this reference when managing Oracle privileges, implementing row-level security, encrypting data, configuring auditing, or preventing SQL injection. Security is not a feature you add later — build it into every schema and application design from the start. - -## Privilege Management - -### System vs Object Privileges - -System privileges operate at the database level (`CREATE TABLE`, `SELECT ANY TABLE`). Object privileges operate on specific objects (`SELECT ON hr.employees`). Prefer object privileges because they follow least-privilege. - -```sql --- Object privilege (preferred): specific and revocable -GRANT SELECT, INSERT ON hr.employees TO app_user; - --- System privilege (use with caution) -GRANT CREATE SESSION, CREATE TABLE TO app_user; - --- Revoke -REVOKE INSERT ON hr.employees FROM app_user; -``` - -### Dangerous ANY Privileges to Avoid - -These grant power across ALL schemas. Never grant to application accounts. - -- `SELECT ANY TABLE` — reads every table in the database -- `INSERT ANY TABLE`, `UPDATE ANY TABLE`, `DELETE ANY TABLE` — modifies any data -- `DROP ANY TABLE` — destroys any schema object -- `EXECUTE ANY PROCEDURE` — runs any PL/SQL, including privileged packages -- `ALTER ANY TABLE` — restructures any table -- `CREATE ANY DIRECTORY` — creates OS directory mappings, a path to file system access - -If an application claims it needs an ANY privilege, the application design is wrong. Fix the design. - -### Roles - -Group privileges into roles for manageability. Never grant privileges directly to individual users at scale. - -```sql -CREATE ROLE app_readonly; -GRANT SELECT ON hr.employees TO app_readonly; -GRANT SELECT ON hr.departments TO app_readonly; - -GRANT app_readonly TO reporting_user; - --- Verify effective privileges -SELECT * FROM DBA_TAB_PRIVS WHERE grantee = 'APP_READONLY'; -SELECT * FROM DBA_ROLE_PRIVS WHERE grantee = 'REPORTING_USER'; -``` - -## Least-Privilege Analysis with DBMS_PRIVILEGE_CAPTURE - -Stop guessing which privileges an application actually uses. Capture real usage and trim everything else. - -```sql --- Create a capture for a specific role -BEGIN - DBMS_PRIVILEGE_CAPTURE.CREATE_CAPTURE( - name => 'app_priv_audit', - type => DBMS_PRIVILEGE_CAPTURE.G_ROLE, - roles => role_name_list('APP_ROLE') - ); -END; -/ - --- Start capture (run during normal application activity) -EXEC DBMS_PRIVILEGE_CAPTURE.ENABLE_CAPTURE('app_priv_audit'); - --- After sufficient activity, stop and analyze -EXEC DBMS_PRIVILEGE_CAPTURE.DISABLE_CAPTURE('app_priv_audit'); -EXEC DBMS_PRIVILEGE_CAPTURE.GENERATE_RESULT('app_priv_audit'); - --- See which privileges were actually used -SELECT * FROM DBA_USED_PRIVS WHERE capture = 'APP_PRIV_AUDIT'; - --- See which privileges were granted but never used — revoke these -SELECT * FROM DBA_UNUSED_PRIVS WHERE capture = 'APP_PRIV_AUDIT'; -``` - -## Virtual Private Database (VPD / FGAC) - -VPD appends a WHERE clause to every query transparently. Use it when different users must see different rows from the same table, and you cannot trust the application layer to enforce this. - -### Create a Policy Function - -The function returns a predicate string that Oracle appends to every query on the protected table. - -```sql -CREATE OR REPLACE FUNCTION vpd_region_filter( - p_schema IN VARCHAR2, - p_table IN VARCHAR2 -) RETURN VARCHAR2 AS - v_region VARCHAR2(100); -BEGIN - -- Read the region from the application context - v_region := SYS_CONTEXT('APP_CTX', 'USER_REGION'); - - -- SYS and schema owner bypass the policy - IF SYS_CONTEXT('USERENV', 'SESSION_USER') IN ('SYS', p_schema) THEN - RETURN NULL; -- no filter - END IF; - - RETURN 'region = ''' || v_region || ''''; -END; -/ -``` - -### Attach the Policy - -```sql -BEGIN - DBMS_RLS.ADD_POLICY( - object_schema => 'SALES', - object_name => 'ORDERS', - policy_name => 'region_isolation', - function_schema => 'SEC_ADMIN', - policy_function => 'vpd_region_filter', - statement_types => 'SELECT, INSERT, UPDATE, DELETE', - policy_type => DBMS_RLS.SHARED_CONTEXT_SENSITIVE - ); -END; -/ -``` - -### Set the Application Context - -```sql --- Create context -CREATE OR REPLACE CONTEXT app_ctx USING sec_admin.set_context_pkg; - --- Package to set context (called at session start) -CREATE OR REPLACE PACKAGE BODY sec_admin.set_context_pkg AS - PROCEDURE set_region(p_region VARCHAR2) IS - BEGIN - DBMS_SESSION.SET_CONTEXT('APP_CTX', 'USER_REGION', p_region); - END; -END; -/ -``` - -### Policy Types - -- `STATIC`: Predicate computed once per parse. Use for predicates that never change within a session. -- `SHARED_CONTEXT_SENSITIVE`: Re-evaluates when the application context changes. Best default choice. -- `DYNAMIC`: Re-evaluates on every execution. Highest overhead; avoid unless necessary. - -## Transparent Data Encryption (TDE) - -TDE encrypts data at rest on disk. Queries work normally — decryption is transparent to the application. - -### Tablespace Encryption (Preferred) - -Encrypt the entire tablespace. This is simpler, faster, and avoids column-level restrictions. - -```sql --- Configure the wallet first -ADMINISTER KEY MANAGEMENT CREATE KEYSTORE '/opt/oracle/wallet' IDENTIFIED BY wallet_password; -ADMINISTER KEY MANAGEMENT SET KEY IDENTIFIED BY wallet_password WITH BACKUP; -ADMINISTER KEY MANAGEMENT SET KEYSTORE OPEN IDENTIFIED BY wallet_password; - --- Create encrypted tablespace -CREATE TABLESPACE secure_data - DATAFILE '/opt/oracle/oradata/secure01.dbf' SIZE 500M - ENCRYPTION USING 'AES256' - DEFAULT STORAGE(ENCRYPT); -``` - -### Column Encryption - -Use when only specific columns need protection and tablespace-level encryption is not feasible. - -```sql -ALTER TABLE customers MODIFY (ssn ENCRYPT USING 'AES256' NO SALT); -``` - -**NO SALT** is required if the column is indexed. Salt adds randomness that prevents indexing. - -### Key Rotation - -Rotate the master encryption key periodically without re-encrypting data. - -```sql -ADMINISTER KEY MANAGEMENT SET KEY IDENTIFIED BY wallet_password WITH BACKUP; -``` - -## Unified Auditing - -Unified Auditing consolidates all audit trails into a single location. Use it to track who did what and when. - -### Create Audit Policies - -```sql --- Audit all DML on sensitive tables -CREATE AUDIT POLICY sensitive_table_audit - ACTIONS SELECT, INSERT, UPDATE, DELETE - ON hr.employees, hr.salaries; - --- Audit privilege use -CREATE AUDIT POLICY priv_use_audit - PRIVILEGES CREATE TABLE, DROP ANY TABLE, ALTER SYSTEM; - --- Audit logon failures -CREATE AUDIT POLICY logon_audit - ACTIONS LOGON; - --- Condition-based: only audit non-service accounts -CREATE AUDIT POLICY app_audit - ACTIONS ALL - ON hr.employees - WHEN 'SYS_CONTEXT(''USERENV'', ''SESSION_USER'') NOT IN (''SVC_ACCOUNT'')' - EVALUATE PER SESSION; - --- Enable policies -AUDIT POLICY sensitive_table_audit; -AUDIT POLICY logon_audit WHENEVER NOT SUCCESSFUL; -``` - -### Query the Audit Trail - -```sql -SELECT event_timestamp, dbusername, action_name, object_schema, object_name, sql_text -FROM UNIFIED_AUDIT_TRAIL -WHERE event_timestamp > SYSDATE - 1 -ORDER BY event_timestamp DESC; -``` - -### Fine-Grained Auditing (FGA) - -Use DBMS_FGA when you need to audit access to specific columns or rows, not just any access to the table. - -```sql -BEGIN - DBMS_FGA.ADD_POLICY( - object_schema => 'HR', - object_name => 'EMPLOYEES', - policy_name => 'salary_access', - audit_column => 'SALARY', - audit_condition => 'DEPARTMENT_ID = 10', - statement_types => 'SELECT' - ); -END; -/ -``` - -## SQL Injection Prevention - -### Always Use Bind Variables - -Bind variables prevent injection and improve performance through cursor sharing. There is no valid reason to concatenate user input into SQL strings. - -```sql --- CORRECT: bind variable -EXECUTE IMMEDIATE 'SELECT * FROM employees WHERE id = :1' USING p_emp_id; - --- WRONG: concatenation — injectable -EXECUTE IMMEDIATE 'SELECT * FROM employees WHERE id = ' || p_emp_id; -``` - -### DBMS_ASSERT for Dynamic Identifiers - -When you must build dynamic SQL with table or column names (which cannot be bind variables), validate identifiers with DBMS_ASSERT. - -```sql --- Validates that the input is a real, existing schema object name -v_table := DBMS_ASSERT.SQL_OBJECT_NAME(p_table_input); - --- Validates as a simple SQL name (no dots, no special chars) -v_column := DBMS_ASSERT.SIMPLE_SQL_NAME(p_column_input); - --- Safe dynamic SQL with validated identifier -EXECUTE IMMEDIATE 'SELECT COUNT(*) FROM ' || v_table INTO v_count; -``` - -### DBMS_ASSERT Functions - -- `ENQUOTE_NAME`: Double-quote wraps an identifier safely. -- `SIMPLE_SQL_NAME`: Rejects anything that is not a valid simple identifier. -- `SQL_OBJECT_NAME`: Validates that the name resolves to an existing object. -- `SCHEMA_NAME`: Validates an existing schema name. - -## Data Redaction with DBMS_REDACT - -Mask sensitive data in query results without changing stored data. The application sees masked values; the data on disk remains intact. - -```sql --- Full redaction: replace entire value -BEGIN - DBMS_REDACT.ADD_POLICY( - object_schema => 'HR', - object_name => 'EMPLOYEES', - column_name => 'SSN', - policy_name => 'redact_ssn', - function_type => DBMS_REDACT.FULL, - expression => 'SYS_CONTEXT(''USERENV'', ''SESSION_USER'') != ''HR_ADMIN''' - ); -END; -/ - --- Partial redaction: show last 4 digits of SSN -BEGIN - DBMS_REDACT.ADD_POLICY( - object_schema => 'HR', - object_name => 'EMPLOYEES', - column_name => 'SSN', - policy_name => 'partial_ssn', - function_type => DBMS_REDACT.PARTIAL, - function_parameters => 'VVVFVVFVVVV,VVV-VV-VVVV,*,1,5', - expression => 'SYS_CONTEXT(''USERENV'', ''SESSION_USER'') != ''HR_ADMIN''' - ); -END; -/ -``` - -### Redaction Types - -- `FULL`: Replaces the entire value with a default (0 for numbers, blank for strings). -- `PARTIAL`: Masks a portion of the value (show last 4 digits, mask the rest). -- `REGEXP`: Apply regex-based masking for complex formats (emails, phone numbers). -- `RANDOM`: Replace with a random value of the same data type. - -## Learn More (Official) - -- Oracle Database Security Guide: -- DBMS_RLS Reference: -- Unified Auditing: -- TDE Configuration: -- DBMS_REDACT Reference: diff --git a/plugins/flow/skills/oracle/references/sql_patterns.md b/plugins/flow/skills/oracle/references/sql_patterns.md deleted file mode 100644 index de9e405..0000000 --- a/plugins/flow/skills/oracle/references/sql_patterns.md +++ /dev/null @@ -1,317 +0,0 @@ -# Oracle SQL Patterns - -## Overview - -Use this reference when writing non-trivial Oracle SQL: analytic functions, recursive queries, hierarchical data, pivoting, upserts, flashback queries, or dynamic SQL. Every pattern includes a concrete example and explains why you would reach for it over simpler alternatives. - ---- - -## Analytic / Window Functions - -Analytic functions compute values across a set of rows related to the current row without collapsing the result set. Use them instead of self-joins or correlated subqueries because the optimizer processes the window in a single pass. - -### ROW_NUMBER, RANK, DENSE_RANK - -```sql --- ROW_NUMBER assigns a unique sequential integer per partition. --- Use it when you need exactly one row per group (e.g., latest order per customer). -SELECT customer_id, order_date, total, - ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY order_date DESC) AS rn - FROM orders; - --- RANK leaves gaps after ties (1,1,3). DENSE_RANK does not (1,1,2). --- Use RANK for competition-style ranking; DENSE_RANK when downstream logic --- needs contiguous integers (e.g., "top 3 tiers"). -SELECT product_id, revenue, - RANK() OVER (ORDER BY revenue DESC) AS revenue_rank, - DENSE_RANK() OVER (ORDER BY revenue DESC) AS revenue_dense - FROM product_sales; -``` - -### LAG / LEAD - -```sql --- LAG looks backward; LEAD looks forward. Use them to compare adjacent rows --- without a self-join, which is both clearer and faster. -SELECT trade_date, close_price, - LAG(close_price) OVER (ORDER BY trade_date) AS prev_close, - close_price - LAG(close_price) OVER (ORDER BY trade_date) AS daily_change - FROM stock_prices - WHERE ticker = 'ORCL'; -``` - -### Running Totals - -```sql --- A windowed SUM with an ORDER BY clause produces a running total. --- The default frame is RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW. -SELECT txn_date, amount, - SUM(amount) OVER (ORDER BY txn_date) AS running_balance - FROM ledger - WHERE account_id = 1001; -``` - -### NTILE - -```sql --- NTILE distributes rows into N roughly equal buckets. --- Use it for percentile segmentation (quartiles, deciles). -SELECT employee_id, salary, - NTILE(4) OVER (ORDER BY salary) AS salary_quartile - FROM employees; -``` - -### LISTAGG - -```sql --- LISTAGG concatenates values from multiple rows into a single string. --- Add ON OVERFLOW TRUNCATE (12c+) to avoid ORA-01489 on wide result sets. -SELECT department_id, - LISTAGG(last_name, ', ') WITHIN GROUP (ORDER BY last_name) - ON OVERFLOW TRUNCATE '...' AS team_members - FROM employees - GROUP BY department_id; -``` - ---- - -## Common Table Expressions (CTEs) - -### Basic and Chained CTEs - -```sql --- CTEs improve readability by naming intermediate result sets. --- Chain multiple CTEs when the pipeline has distinct logical steps. -WITH active_customers AS ( - SELECT customer_id, email - FROM customers - WHERE status = 'ACTIVE' -), -recent_orders AS ( - SELECT o.customer_id, COUNT(*) AS order_count - FROM orders o - JOIN active_customers ac ON ac.customer_id = o.customer_id - WHERE o.order_date >= ADD_MONTHS(SYSDATE, -3) - GROUP BY o.customer_id -) -SELECT ac.email, NVL(ro.order_count, 0) AS orders_last_90d - FROM active_customers ac - LEFT JOIN recent_orders ro ON ro.customer_id = ac.customer_id; -``` - -### Recursive CTEs - -```sql --- Use recursive CTEs to walk tree/graph structures. --- SEARCH BREADTH FIRST produces level-order traversal; DEPTH FIRST produces --- pre-order traversal. Choose based on how you want results sorted. -WITH org_tree (employee_id, manager_id, full_name, lvl) AS ( - SELECT employee_id, manager_id, full_name, 1 - FROM employees - WHERE manager_id IS NULL -- anchor: CEO - UNION ALL - SELECT e.employee_id, e.manager_id, e.full_name, ot.lvl + 1 - FROM employees e - JOIN org_tree ot ON ot.employee_id = e.manager_id -) -SEARCH DEPTH FIRST BY full_name SET order_seq -CYCLE employee_id SET is_cycle TO 'Y' DEFAULT 'N' -- prevent infinite loops -SELECT LPAD(' ', (lvl - 1) * 2) || full_name AS org_chart - FROM org_tree - WHERE is_cycle = 'N' - ORDER BY order_seq; -``` - -- **SEARCH DEPTH FIRST** — produces a tree listing where children appear immediately after their parent. -- **SEARCH BREADTH FIRST** — produces level-by-level output (all directors, then all managers, then all ICs). -- **CYCLE** — detects loops in dirty data. Without it, a circular manager reference causes ORA-32044. - ---- - -## Hierarchical Queries (CONNECT BY) - -CONNECT BY is Oracle's original tree-walking syntax. Prefer recursive CTEs for new code because they are ANSI-standard and composable with other CTEs, but understand CONNECT BY for legacy codebases. - -```sql --- Walk an org chart with CONNECT BY. --- PRIOR marks the recursive join direction: parent → child. -SELECT LEVEL AS depth, - SYS_CONNECT_BY_PATH(full_name, ' > ') AS path, - employee_id, manager_id - FROM employees - START WITH manager_id IS NULL - CONNECT BY PRIOR employee_id = manager_id - ORDER SIBLINGS BY full_name; - --- CONNECT_BY_ISLEAF = 1 identifies terminal nodes (no subordinates). --- CONNECT_BY_ROOT returns the anchor row's column value. -``` - -**When to use CONNECT BY over recursive CTE:** only when you need `SYS_CONNECT_BY_PATH`, `CONNECT_BY_ROOT`, or `CONNECT_BY_ISLEAF` and rewriting them as CTE window functions is not worth the effort. - ---- - -## PIVOT / UNPIVOT - -### PIVOT - -```sql --- PIVOT rotates rows into columns. Use it to transform normalized data --- into a cross-tab report without writing CASE expressions by hand. -SELECT * - FROM (SELECT department_id, job_id, salary FROM employees) - PIVOT ( - SUM(salary) - FOR job_id IN ('SA_REP' AS sales_rep, - 'SA_MAN' AS sales_mgr, - 'IT_PROG' AS it_prog) - ); -``` - -### UNPIVOT - -```sql --- UNPIVOT does the reverse: columns become rows. --- Use it when importing wide-format data that needs normalization. -SELECT department_id, job_role, total_salary - FROM dept_salary_summary - UNPIVOT ( - total_salary FOR job_role IN (sales_rep, sales_mgr, it_prog) - ); -``` - ---- - -## MERGE (Upsert) - -```sql --- MERGE atomically inserts or updates based on a join condition. --- The optional DELETE WHERE clause removes matched rows that no longer --- qualify after the update — useful for soft-delete or expiry logic. -MERGE INTO inventory tgt -USING (SELECT product_id, qty, warehouse_id FROM staging) src - ON (tgt.product_id = src.product_id AND tgt.warehouse_id = src.warehouse_id) - WHEN MATCHED THEN - UPDATE SET tgt.qty = tgt.qty + src.qty, tgt.last_updated = SYSDATE - DELETE WHERE tgt.qty <= 0 -- remove depleted stock - WHEN NOT MATCHED THEN - INSERT (product_id, warehouse_id, qty, last_updated) - VALUES (src.product_id, src.warehouse_id, src.qty, SYSDATE); -``` - -**Why MERGE over INSERT + UPDATE:** a single MERGE scans the target once instead of twice, reduces round-trips, and guarantees atomicity without explicit locking. - ---- - -## MODEL Clause - -The MODEL clause treats query results as a spreadsheet where you reference cells by dimension and apply iterative rules. Use it for forecasting, allocation, or any calculation that references other rows by position. - -```sql --- Forecast next-quarter revenue using a simple growth multiplier. -SELECT quarter, region, revenue - FROM quarterly_sales - MODEL - PARTITION BY (region) - DIMENSION BY (quarter) - MEASURES (revenue) - RULES ( - revenue['Q1-2027'] = revenue['Q4-2026'] * 1.05, - revenue['Q2-2027'] = revenue['Q1-2027'] * 1.03 - ) - ORDER BY region, quarter; -``` - -**When MODEL is appropriate:** inter-row calculations that depend on computed values from other rules (cascading formulas). For simple running totals, use analytic functions instead. - ---- - -## Dynamic SQL - -### EXECUTE IMMEDIATE - -```sql --- Use EXECUTE IMMEDIATE for one-shot dynamic statements. --- Always use bind variables to prevent SQL injection and benefit from --- cursor sharing in the shared pool. -DECLARE - v_table VARCHAR2(128) := 'employees'; - v_count NUMBER; -BEGIN - -- Identifier (table name) cannot be bound; validate it first. - IF NOT REGEXP_LIKE(v_table, '^[A-Za-z_][A-Za-z0-9_#$]*$') THEN - RAISE_APPLICATION_ERROR(-20001, 'Invalid identifier: ' || v_table); - END IF; - - EXECUTE IMMEDIATE - 'SELECT COUNT(*) FROM ' || DBMS_ASSERT.SQL_OBJECT_NAME(v_table) || ' WHERE department_id = :dept' - INTO v_count - USING 10; -END; -/ -``` - -### DBMS_SQL — Parse Once, Execute Many - -```sql --- Use DBMS_SQL when you need to parse a statement once and execute it --- many times with different binds (batch inserts from dynamic sources). -DECLARE - v_cur INTEGER := DBMS_SQL.OPEN_CURSOR; - v_rows INTEGER; -BEGIN - DBMS_SQL.PARSE(v_cur, - 'INSERT INTO audit_log (event_type, payload) VALUES (:etype, :pload)', - DBMS_SQL.NATIVE); - - FOR rec IN (SELECT event_type, payload FROM staging_events) LOOP - DBMS_SQL.BIND_VARIABLE(v_cur, ':etype', rec.event_type); - DBMS_SQL.BIND_VARIABLE(v_cur, ':pload', rec.payload); - v_rows := DBMS_SQL.EXECUTE(v_cur); - END LOOP; - - DBMS_SQL.CLOSE_CURSOR(v_cur); -END; -/ -``` - ---- - -## Flashback Queries - -Flashback queries let you read past versions of data without restoring from backup. They rely on undo data, so the retention window depends on `UNDO_RETENTION`. - -### AS OF TIMESTAMP - -```sql --- Retrieve the state of a table at a specific point in time. --- Use this to investigate accidental DML or audit "what changed." -SELECT * FROM orders AS OF TIMESTAMP - TO_TIMESTAMP('2026-03-25 14:00:00', 'YYYY-MM-DD HH24:MI:SS') - WHERE order_id = 9001; -``` - -### VERSIONS BETWEEN - -```sql --- Show all versions of rows that changed within a time range. --- VERSIONS_STARTTIME / VERSIONS_ENDTIME / VERSIONS_OPERATION (I/U/D) --- are pseudo-columns that describe each version. -SELECT order_id, status, total, - VERSIONS_OPERATION AS op, - VERSIONS_STARTTIME AS changed_at - FROM orders VERSIONS BETWEEN TIMESTAMP - TO_TIMESTAMP('2026-03-25 12:00:00', 'YYYY-MM-DD HH24:MI:SS') - AND SYSTIMESTAMP - WHERE order_id = 9001 - ORDER BY VERSIONS_STARTTIME; -``` - ---- - -## Official References - -- Oracle SQL Language Reference: -- Oracle Analytic Functions: -- Oracle Data Warehousing Guide (MODEL clause): -- Flashback Query: diff --git a/plugins/flow/skills/oracle/references/sqlplus.md b/plugins/flow/skills/oracle/references/sqlplus.md deleted file mode 100644 index c1f8e78..0000000 --- a/plugins/flow/skills/oracle/references/sqlplus.md +++ /dev/null @@ -1,264 +0,0 @@ -# SQL*Plus & SQLcl - -## Overview - -Use this reference when working with Oracle's command-line SQL tools. SQL*Plus is the traditional client bundled with every Oracle installation. SQLcl is the modern replacement with built-in Liquibase, JavaScript scripting, and the MCP server that lets AI assistants interact with Oracle databases directly. - -## SQL*Plus Essentials - -### Connecting - -```bash -# Basic connection -sqlplus hr/password@hostname:1521/FREEPDB1 - -# As SYSDBA -sqlplus / as sysdba - -# With TNS alias -sqlplus hr/password@mydb - -# Connection with wallet (no password in command) -sqlplus /@mydb_wallet -``` - -### SET Commands - -Configure these at the top of scripts for clean, predictable output. - -```sql -SET LINESIZE 200 -- prevent line wrapping -SET PAGESIZE 50 -- rows per page before headers repeat -SET SERVEROUTPUT ON -- show DBMS_OUTPUT messages -SET TIMING ON -- display elapsed time after each statement -SET FEEDBACK ON -- show "N rows selected" messages -SET TRIMSPOOL ON -- remove trailing blanks in SPOOL output -SET VERIFY OFF -- suppress old/new substitution variable echo -SET ECHO OFF -- suppress command echo in scripts -``` - -### Substitution Variables - -Use `&` for interactive prompts and `DEFINE` for scripted values. - -```sql --- Interactive prompt -SELECT * FROM employees WHERE department_id = &dept_id; - --- Scripted (no prompt) -DEFINE table_name = 'EMPLOYEES' -SELECT COUNT(*) FROM &table_name; - --- Pass from command line --- sqlplus hr/pass@db @myscript.sql EMPLOYEES --- In script: &1 refers to the first argument -SELECT COUNT(*) FROM &1; -``` - -### SPOOL - -Capture output to a file. Always `SPOOL OFF` to flush and close. - -```sql -SPOOL /tmp/report.csv -SELECT employee_id || ',' || last_name || ',' || salary FROM employees; -SPOOL OFF -``` - -### Column Formatting - -```sql -COLUMN employee_name FORMAT A30 -- 30-character string column -COLUMN salary FORMAT 999,999.00 -- numeric with commas and decimals -COLUMN hire_date FORMAT A12 -- date column width -``` - -## SQLcl - -SQLcl replaces SQL*Plus for modern workflows. It runs on Java and requires no Oracle Client installation. - -### Installation - -Download from Oracle or install via package manager. Requires Java 11+. - -```bash -# After download and extract -export PATH=$PATH:/opt/sqlcl/bin -sql hr/password@localhost:1521/FREEPDB1 -``` - -### Key Differences from SQL*Plus - -- Tab completion for table names, column names, and SQL keywords. -- Built-in `HISTORY` command with search. -- `INFO` command to describe objects with more detail than `DESC`. -- `ALIAS` command to create reusable SQL shortcuts. -- `DDL` command to generate DDL for any object. -- Inline editing with `ED` launches a real editor. -- Built-in Liquibase and JavaScript engines. - -### Output Formats - -Switch output format without changing the query. This matters for automation and data exchange. - -```sql -SET SQLFORMAT CSV -- comma-separated -SET SQLFORMAT JSON -- JSON array of objects -SET SQLFORMAT XML -- XML output -SET SQLFORMAT ANSICONSOLE -- colored, auto-sized terminal table -SET SQLFORMAT INSERT -- generates INSERT statements -SET SQLFORMAT LOADER -- pipe-delimited for SQL*Loader -SET SQLFORMAT DEFAULT -- reset to standard -``` - -### LOAD Command - -Import data directly from CSV without SQL*Loader configuration files. - -```sql --- Load CSV into existing table -LOAD TABLE employees /path/to/employees.csv - --- With explicit delimiter -LOAD TABLE employees /path/to/data.tsv DELIMITER '\t' -``` - -### INFO Command - -```sql --- Detailed object information -INFO employees -INFO+ employees -- extended: indexes, constraints, column stats -``` - -## SQLcl Liquibase Integration - -SQLcl ships with Liquibase built in. Use it to version-control schema changes without installing Liquibase separately. - -```sql --- Generate changelog from existing schema -lb generate-schema -split - --- Generate changelog for specific object types -lb generate-changelog -object-type TABLE,INDEX,SEQUENCE - --- Apply changes -lb update -changelog-file controller.xml - --- Rollback last N changesets -lb rollback -count 3 - --- Show pending changes -lb status -changelog-file controller.xml - --- Diff two schemas -lb diff -reference-url jdbc:oracle:thin:@host:1521/pdb -reference-username hr2 -``` - -### Why Use SQLcl Liquibase Over Standalone - -- No separate Java/Liquibase install needed. -- Oracle-aware: understands PL/SQL block delimiters, Oracle DDL quirks. -- Direct wallet and TNS integration for secure connections. -- The `generate-schema` command reverse-engineers a full schema into versioned changelogs. - -## SQLcl MCP Server - -SQLcl 25.2+ includes an MCP (Model Context Protocol) server that lets AI assistants connect to and query Oracle databases. This is directly relevant for AI-assisted database development. - -### Start the MCP Server - -```bash -# Start SQLcl in MCP server mode -sql -mcp - -# With explicit connection -sql -mcp hr/password@localhost:1521/FREEPDB1 -``` - -### Configuration for AI Assistants - -Add to your assistant's MCP configuration: - -```json -{ - "mcpServers": { - "oracle-sqlcl": { - "command": "sql", - "args": ["-mcp"], - "env": { - "TNS_ADMIN": "/path/to/wallet" - } - } - } -} -``` - -### Capabilities Exposed via MCP - -- Execute SQL queries and PL/SQL blocks. -- Describe database objects (tables, views, procedures). -- Browse schema metadata. -- Generate DDL. -- Run Liquibase operations. - -### Security Considerations - -- The MCP server runs with the privileges of the connected database user. Use a restricted account. -- Prefer wallet-based authentication to avoid credentials in config files. -- Consider read-only users for exploratory/assistant use cases. - -## JavaScript Scripting - -SQLcl embeds the Nashorn/GraalVM JavaScript engine for complex automation beyond SQL. - -```javascript -// Run with: script /path/to/myscript.js -var conn = sqlcl.getConnection(); -var stmt = conn.createStatement(); -var rs = stmt.executeQuery("SELECT table_name FROM user_tables"); - -while (rs.next()) { - ctx.write(rs.getString("TABLE_NAME") + "\n"); -} - -rs.close(); -stmt.close(); -``` - -Use JavaScript when you need conditional logic, looping with state, or output formatting beyond what SQL and PL/SQL offer in a CLI context. - -## Headless and CI Patterns - -Run SQL*Plus and SQLcl non-interactively in pipelines. - -```bash -# SQL*Plus: pipe script, exit on error -echo "EXIT SQL.SQLCODE" >> myscript.sql -sqlplus -S hr/password@db @myscript.sql -if [ $? -ne 0 ]; then echo "SQL failed"; exit 1; fi - -# SQLcl: same pattern, richer exit control -sql -S hr/password@db @myscript.sql - -# SQLcl with JSON output for parsing in CI -sql -S hr/password@db <<'EOF' -SET SQLFORMAT JSON -SELECT status, COUNT(*) AS cnt FROM orders GROUP BY status; -EXIT -EOF -``` - -### CI Best Practices - -- Always use `-S` (silent) mode to suppress banners and prompts. -- Append `EXIT SQL.SQLCODE` or `WHENEVER SQLERROR EXIT SQL.SQLCODE` to fail pipelines on errors. -- Use `WHENEVER OSERROR EXIT FAILURE` to catch OS-level failures. -- Capture SPOOL output for test evidence and audit trails. - -## Learn More (Official) - -- SQL*Plus User's Guide: -- SQLcl Documentation: -- SQLcl Downloads: -- SQLcl Liquibase: diff --git a/plugins/flow/skills/oracle/references/vectors.md b/plugins/flow/skills/oracle/references/vectors.md deleted file mode 100644 index 658403d..0000000 --- a/plugins/flow/skills/oracle/references/vectors.md +++ /dev/null @@ -1,275 +0,0 @@ -# Oracle AI Vector Search - -## Overview - -Oracle Database 23ai introduced native vector support for AI-powered similarity search. The VECTOR data type, distance functions, and approximate nearest neighbor (ANN) indexes enable embedding storage and retrieval directly inside the database — no external vector database required. - -## VECTOR Data Type - -```sql --- Fixed-dimension vector (recommended when dimension is known) -CREATE TABLE documents ( - id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY, - title VARCHAR2(200), - content CLOB, - embedding VECTOR(1536, FLOAT32) -- 1536 dimensions, 32-bit floats -); - --- Flexible-dimension vector -CREATE TABLE multi_model_embeddings ( - id NUMBER PRIMARY KEY, - model_name VARCHAR2(100), - embedding VECTOR(*, FLOAT32) -- any dimension -); - --- Supported element types --- FLOAT32 (default, best compatibility) --- FLOAT64 (double precision, rarely needed) --- INT8 (quantized, 4x smaller, slight quality loss) --- BINARY (bit-packed, 32x smaller, for binary embeddings) -``` - -### Inserting Vectors - -```sql --- From a literal array -INSERT INTO documents (title, embedding) -VALUES ('Oracle AI', VECTOR('[0.1, 0.2, 0.3, ...]', 1536, FLOAT32)); - --- From Python with python-oracledb -import oracledb -import numpy as np - -embedding = np.random.rand(1536).astype(np.float32) -cursor.execute( - "INSERT INTO documents (title, embedding) VALUES (:1, :2)", - ["My Document", embedding.tobytes()] -) -``` - -## Distance Functions - -Oracle supports multiple distance metrics for similarity comparison: - -```sql --- Cosine distance (most common for text embeddings, normalized vectors) -SELECT title, VECTOR_DISTANCE(embedding, :query_vec, COSINE) AS distance -FROM documents -ORDER BY distance -FETCH FIRST 10 ROWS ONLY; - --- Euclidean (L2) distance -SELECT title, VECTOR_DISTANCE(embedding, :query_vec, EUCLIDEAN) AS distance -FROM documents -ORDER BY distance -FETCH FIRST 10 ROWS ONLY; - --- Dot product (for unnormalized vectors, higher = more similar) -SELECT title, VECTOR_DISTANCE(embedding, :query_vec, DOT) AS similarity -FROM documents -ORDER BY similarity DESC -FETCH FIRST 10 ROWS ONLY; - --- Manhattan (L1) distance -SELECT title, VECTOR_DISTANCE(embedding, :query_vec, MANHATTAN) AS distance -FROM documents -ORDER BY distance -FETCH FIRST 10 ROWS ONLY; - --- Hamming distance (for BINARY vectors) -SELECT title, VECTOR_DISTANCE(embedding, :query_vec, HAMMING) AS distance -FROM documents -ORDER BY distance -FETCH FIRST 10 ROWS ONLY; -``` - -### Shorthand Operators - -```sql --- Cosine distance shorthand (< = > operator) -SELECT title FROM documents -ORDER BY embedding <=> :query_vec -FETCH FIRST 10 ROWS ONLY; -``` - -## Vector Indexes (ANN) - -Approximate Nearest Neighbor indexes trade slight accuracy for dramatically faster search at scale. - -### IVF (Inverted File) Index - -Partitions vectors into clusters. Best for datasets where you can tolerate some accuracy loss for speed. - -```sql --- Create IVF index -CREATE VECTOR INDEX doc_embed_ivf_idx ON documents (embedding) -ORGANIZATION NEIGHBOR PARTITIONS -WITH DISTANCE COSINE -PARAMETERS (TYPE IVF, NEIGHBOR PARTITIONS 64); - --- Query with ANN (uses index automatically when available) -SELECT title, VECTOR_DISTANCE(embedding, :query_vec, COSINE) AS dist -FROM documents -ORDER BY dist -FETCH APPROXIMATE FIRST 10 ROWS ONLY; -``` - -### HNSW (Hierarchical Navigable Small World) Index - -Graph-based index with better recall than IVF at similar speed. Preferred for most use cases. - -```sql -CREATE VECTOR INDEX doc_embed_hnsw_idx ON documents (embedding) -ORGANIZATION INMEMORY NEIGHBOR GRAPH -WITH DISTANCE COSINE -PARAMETERS (TYPE HNSW, M 16, EFCONSTRUCTION 200); - --- HNSW parameters: --- M : max connections per node (higher = better recall, more memory). Default 16. --- EFCONSTRUCTION : search width during build (higher = better index quality, slower build). Default 200. -``` - -### Choosing Between IVF and HNSW - -| Factor | IVF | HNSW | -|--------|-----|------| -| Build speed | Faster | Slower | -| Query speed | Fast | Faster | -| Recall accuracy | Good (~90-95%) | Better (~95-99%) | -| Memory usage | Lower | Higher | -| Update cost | Requires rebuild | Incremental | -| Best for | Large static datasets | Dynamic datasets, high recall needs | - -## Embedding Generation (In-Database) - -Oracle 23ai can generate embeddings directly using DBMS_VECTOR: - -```sql --- Generate embedding using a configured model -SELECT DBMS_VECTOR.UTL_TO_EMBEDDING( - 'What is Oracle AI Vector Search?', - JSON('{"provider": "ocigenai", "model": "cohere.embed-english-v3.0"}') -) AS embedding -FROM DUAL; - --- Batch embedding generation -INSERT INTO documents (title, content, embedding) -SELECT title, content, - DBMS_VECTOR.UTL_TO_EMBEDDING(content, - JSON('{"provider": "ocigenai", "model": "cohere.embed-english-v3.0"}')) -FROM source_documents; -``` - -### Supported Providers - -- **OCI GenAI**: Cohere, Meta Llama embeddings -- **OpenAI**: text-embedding-ada-002, text-embedding-3-small/large -- **Cohere**: embed-english-v3.0, embed-multilingual-v3.0 -- **Custom**: ONNX models loaded into the database - -### Configuring Credentials - -```sql --- Create credential for OCI GenAI -BEGIN - DBMS_CLOUD.CREATE_CREDENTIAL( - credential_name => 'OCI_GENAI_CRED', - user_ocid => 'ocid1.user.oc1...', - tenancy_ocid => 'ocid1.tenancy.oc1...', - private_key => '', - fingerprint => '...' - ); -END; -/ - --- Create credential for OpenAI -BEGIN - DBMS_CLOUD.CREATE_CREDENTIAL( - credential_name => 'OPENAI_CRED', - username => 'OPENAI', - password => '' - ); -END; -/ -``` - -## RAG (Retrieval-Augmented Generation) Pattern - -Combine vector search with LLM generation entirely in SQL: - -```sql --- Step 1: Find relevant context via vector search -WITH context AS ( - SELECT content, - VECTOR_DISTANCE(embedding, :query_embedding, COSINE) AS distance - FROM knowledge_base - ORDER BY distance - FETCH APPROXIMATE FIRST 5 ROWS ONLY -) --- Step 2: Generate answer using retrieved context -SELECT DBMS_VECTOR_CHAIN.UTL_TO_GENERATE_TEXT( - 'Answer the question based on the context below.' || CHR(10) || - 'Context: ' || LISTAGG(content, CHR(10)) WITHIN GROUP (ORDER BY distance) || CHR(10) || - 'Question: ' || :user_question, - JSON('{"provider": "ocigenai", "model": "cohere.command-r-plus"}') -) AS answer -FROM context; -``` - -## Hybrid Search - -Combine vector similarity with traditional SQL filtering for more precise results: - -```sql --- Vector search with metadata filters -SELECT d.title, d.category, - VECTOR_DISTANCE(d.embedding, :query_vec, COSINE) AS distance -FROM documents d -WHERE d.category = 'technical' - AND d.created_at > SYSDATE - 90 - AND d.is_published = 1 -ORDER BY distance -FETCH APPROXIMATE FIRST 10 ROWS ONLY; - --- Combined full-text + vector search -SELECT d.title, - VECTOR_DISTANCE(d.embedding, :query_vec, COSINE) AS vec_dist, - SCORE(1) AS text_score -FROM documents d -WHERE CONTAINS(d.content, :text_query, 1) > 0 -ORDER BY (0.7 * (1 - vec_dist) + 0.3 * SCORE(1)) DESC -FETCH FIRST 10 ROWS ONLY; -``` - -## Vector Utilities - -```sql --- Get vector dimension -SELECT VECTOR_DIMENSION_COUNT(embedding) FROM documents WHERE ROWNUM = 1; - --- Get vector norm (magnitude) -SELECT VECTOR_NORM(embedding) FROM documents WHERE ROWNUM = 1; - --- Convert between formats -SELECT TO_VECTOR('[1.0, 2.0, 3.0]', 3, FLOAT32) FROM DUAL; - --- Serialize vector to string -SELECT FROM_VECTOR(embedding) FROM documents WHERE ROWNUM = 1; -``` - -## Best Practices - -- **Normalize vectors** before storage if using cosine distance — pre-normalized vectors make cosine equivalent to dot product, which is faster -- **Choose dimension wisely**: 1536 (OpenAI ada-002) or 1024 (Cohere v3) are common. Smaller dimensions (384-768) trade accuracy for speed and storage -- **Use INT8 quantization** for large-scale deployments where slight recall loss is acceptable — reduces storage by 4x -- **Create HNSW indexes** for most use cases; use IVF only when memory is constrained -- **Batch insert** embeddings rather than one-at-a-time for throughput -- **Monitor recall**: compare ANN results (FETCH APPROXIMATE) against exact search periodically to validate index quality -- **Partition large tables** by category or date, with per-partition vector indexes for faster pruning - -## Official References - -- Oracle AI Vector Search Guide: -- DBMS_VECTOR Package: -- VECTOR Data Type: -- Oracle AI Vector Search Blog: diff --git a/plugins/flow/skills/podman/SKILL.md b/plugins/flow/skills/podman/SKILL.md deleted file mode 100644 index 43c0514..0000000 --- a/plugins/flow/skills/podman/SKILL.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -name: podman -description: "Use when running Podman, editing Containerfile, managing rootless containers, pods, podman-compose, systemd services, OCI images, secrets, or Docker-compatible workflows without a Docker daemon." ---- - -# Podman - -## Overview - -Podman is a daemonless, rootless container engine compatible with OCI images and the Docker CLI. It supports pod-level grouping, systemd integration via Quadlet, and secure secret management. - ---- - - - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[Usage & Commands](references/usage.md)** - - Core commands (run, build, exec, ps), rootless mode, pod creation, volume mounts, networking. -- **[Systemd Integration](references/systemd.md)** - - Quadlet/systemd integration, auto-start containers, podman generate systemd. -- **[Secret Management](references/secrets.md)** - - Secret management (podman secret create), secure credential handling. - - - ---- - -## Official References - -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Docker](https://github.com/cofin/flow/blob/main/templates/styleguides/tools/docker.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -Add guardrails instructions here. - - - -## Validation - -Add validation instructions here. - - - -## Example - -Add example instructions here. - diff --git a/plugins/flow/skills/podman/agents/openai.yaml b/plugins/flow/skills/podman/agents/openai.yaml deleted file mode 100644 index c6dda58..0000000 --- a/plugins/flow/skills/podman/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Podman" - short_description: "Rootless containers, pods, systemd units, secrets, and Docker compatibility" diff --git a/plugins/flow/skills/podman/references/secrets.md b/plugins/flow/skills/podman/references/secrets.md deleted file mode 100644 index 4032cc6..0000000 --- a/plugins/flow/skills/podman/references/secrets.md +++ /dev/null @@ -1,128 +0,0 @@ -# Podman Secret Management - -## Creating Secrets - -```bash -# From a file -echo -n "my-database-password" | podman secret create db_password - - -# From a file path -podman secret create tls_cert ./server.crt - -# From environment variable -printf '%s' "${DB_PASSWORD}" | podman secret create db_password - -``` - -## Listing & Inspecting Secrets - -```bash -# List all secrets -podman secret ls - -# Inspect secret metadata (does not show the value) -podman secret inspect db_password - -# Remove a secret -podman secret rm db_password -``` - -## Using Secrets in Containers - -```bash -# Mount secret as a file (default: /run/secrets/) -podman run -d \ - --secret db_password \ - --name myapp \ - myimage:latest - -# Inside the container, read: cat /run/secrets/db_password - -# Mount at a custom path -podman run -d \ - --secret db_password,target=/etc/myapp/db-pass \ - myimage:latest - -# Expose as environment variable -podman run -d \ - --secret db_password,type=env,target=DATABASE_PASSWORD \ - myimage:latest -``` - -## Secrets in Quadlet - -```ini -# ~/.config/containers/systemd/myapp.container -[Container] -Image=myimage:latest -Secret=db_password,type=mount,target=/run/secrets/db_password -Secret=api_key,type=env,target=API_KEY -``` - -## Secrets in Podman Compose - -```yaml -# docker-compose.yml (podman-compose compatible) -services: - api: - image: myapi:latest - secrets: - - db_password - - api_key - -secrets: - db_password: - external: true # Must be created with `podman secret create` first - api_key: - file: ./secrets/api_key.txt # Created from file -``` - -## Application Pattern: Reading Secrets - -```python -# Python example: read secret from file -import os -from pathlib import Path - -def get_secret(name: str) -> str: - """Read a secret from /run/secrets/ or fall back to env var.""" - secret_path = Path(f"/run/secrets/{name}") - if secret_path.exists(): - return secret_path.read_text().strip() - value = os.environ.get(name.upper()) - if value is None: - raise ValueError(f"Secret '{name}' not found in /run/secrets/ or environment") - return value - -db_password = get_secret("db_password") -``` - -```go -// Go example: read secret from file -func getSecret(name string) (string, error) { - path := filepath.Join("/run/secrets", name) - data, err := os.ReadFile(path) - if err != nil { - // Fall back to environment variable - if val, ok := os.LookupEnv(strings.ToUpper(name)); ok { - return val, nil - } - return "", fmt.Errorf("secret %q not found", name) - } - return strings.TrimSpace(string(data)), nil -} -``` - -## Security Best Practices - -1. **Never bake secrets into images** -- use runtime secret injection. -2. **Prefer file-based secrets over environment variables** -- env vars appear in `podman inspect` and process listings. -3. **Use `printf '%s'` instead of `echo`** -- avoids newline issues and shell history with some shells. -4. **Rotate secrets** by creating a new secret and restarting the container: - -```bash -echo -n "new-password" | podman secret create db_password_v2 - -podman stop myapp -podman rm myapp -podman run -d --secret db_password_v2,target=/run/secrets/db_password --name myapp myimage -podman secret rm db_password # Remove old secret -``` diff --git a/plugins/flow/skills/podman/references/systemd.md b/plugins/flow/skills/podman/references/systemd.md deleted file mode 100644 index a026d68..0000000 --- a/plugins/flow/skills/podman/references/systemd.md +++ /dev/null @@ -1,199 +0,0 @@ -# Podman Systemd Integration - -## Quadlet (Recommended, Podman 4.4+) - -Quadlet lets you define containers as systemd unit files. Place `.container` files in: - -- User: `~/.config/containers/systemd/` -- System: `/etc/containers/systemd/` - -### Container Unit - -```ini -# ~/.config/containers/systemd/myapp.container -[Unit] -Description=My Application -After=network-online.target - -[Container] -Image=docker.io/myorg/myapp:latest -PublishPort=8080:8080 -Environment=NODE_ENV=production -Volume=myapp-data:/data:Z -Network=myapp.network -AutoUpdate=registry - -[Service] -Restart=on-failure -RestartSec=10 -TimeoutStartSec=60 - -[Install] -WantedBy=default.target -``` - -### Network Unit - -```ini -# ~/.config/containers/systemd/myapp.network -[Network] -Subnet=10.89.0.0/24 -Gateway=10.89.0.1 -``` - -### Volume Unit - -```ini -# ~/.config/containers/systemd/myapp-data.volume -[Volume] -Label=app=myapp -``` - -### Pod Unit - -```ini -# ~/.config/containers/systemd/myapp.pod -[Pod] -PodName=myapp -PublishPort=8080:8080 -PublishPort=5432:5432 -``` - -### Activate Quadlet Units - -```bash -# Reload systemd to discover new units -systemctl --user daemon-reload - -# Start and enable -systemctl --user start myapp.service -systemctl --user enable myapp.service - -# Check status -systemctl --user status myapp.service - -# View logs -journalctl --user -u myapp.service -f -``` - -## Legacy: podman generate systemd - -For Podman < 4.4 or one-off conversions: - -```bash -# Generate systemd unit from running container -podman generate systemd --name myapp --new --files - -# Install for current user -mkdir -p ~/.config/systemd/user -cp container-myapp.service ~/.config/systemd/user/ -systemctl --user daemon-reload -systemctl --user enable --now container-myapp.service -``` - -### Generated Unit Example - -```ini -# container-myapp.service -[Unit] -Description=Podman container-myapp.service -Wants=network-online.target -After=network-online.target - -[Service] -Environment=PODMAN_SYSTEMD_UNIT=%n -Restart=on-failure -TimeoutStopSec=70 -ExecStartPre=/bin/rm -f %t/%n.ctr-id -ExecStart=/usr/bin/podman run \ - --cidfile=%t/%n.ctr-id \ - --cgroups=no-conmon \ - --rm \ - --sdnotify=conmon \ - -d \ - --replace \ - --name myapp \ - -p 8080:8080 \ - myimage:latest -ExecStop=/usr/bin/podman stop --ignore --cidfile=%t/%n.ctr-id -ExecStopPost=/usr/bin/podman rm -f --ignore --cidfile=%t/%n.ctr-id -Type=notify -NotifyAccess=all - -[Install] -WantedBy=default.target -``` - -## Auto-Updates - -```bash -# Enable auto-update timer -systemctl --user enable --now podman-auto-update.timer - -# Check for updates manually -podman auto-update - -# Dry run -podman auto-update --dry-run -``` - -Requires `AutoUpdate=registry` in the Quadlet `.container` file or the `io.containers.autoupdate=registry` label. - -## Enabling Lingering (User Services Without Login) - -```bash -# Allow user services to run after logout -loginctl enable-linger $USER - -# Verify -loginctl show-user $USER | grep Linger -``` - -## Full Stack Example - -```ini -# ~/.config/containers/systemd/webapp.network -[Network] -Subnet=10.89.1.0/24 - -# ~/.config/containers/systemd/db.container -[Unit] -Description=PostgreSQL Database - -[Container] -Image=docker.io/library/postgres:16 -Environment=POSTGRES_PASSWORD=secret -Environment=POSTGRES_DB=myapp -Volume=db-data:/var/lib/postgresql/data:Z -Network=webapp.network -HealthCmd=pg_isready -U postgres -HealthInterval=10s - -[Service] -Restart=always - -[Install] -WantedBy=default.target - -# ~/.config/containers/systemd/api.container -[Unit] -Description=API Server -After=db.service - -[Container] -Image=docker.io/myorg/api:latest -Environment=DATABASE_URL=postgresql://postgres:secret@systemd-db:5432/myapp -PublishPort=8080:8080 -Network=webapp.network - -[Service] -Restart=on-failure - -[Install] -WantedBy=default.target -``` - -```bash -systemctl --user daemon-reload -systemctl --user start db.service api.service -``` diff --git a/plugins/flow/skills/podman/references/usage.md b/plugins/flow/skills/podman/references/usage.md deleted file mode 100644 index 4c90ad3..0000000 --- a/plugins/flow/skills/podman/references/usage.md +++ /dev/null @@ -1,185 +0,0 @@ -# Podman Usage & Commands - -## Core Commands - -```bash -# Run a container (Docker-compatible syntax) -podman run -d --name myapp -p 8080:8080 myimage:latest - -# Build an image -podman build -t myapp:latest . - -# List running containers -podman ps - -# List all containers (including stopped) -podman ps -a - -# Execute command in running container -podman exec -it myapp /bin/sh - -# View logs -podman logs -f myapp - -# Stop and remove -podman stop myapp -podman rm myapp - -# Remove all stopped containers -podman container prune -``` - -## Rootless Mode - -Podman runs rootless by default for the current user. No daemon required. - -```bash -# Check rootless mode -podman info --format '{{.Host.Security.Rootless}}' - -# Rootless containers use user namespaces -# UID mapping is configured in /etc/subuid and /etc/subgid -cat /etc/subuid -# username:100000:65536 - -# Run as specific user inside container -podman run --user 1001:1001 myimage - -# Use --userns=keep-id to map host UID to container UID -podman run --userns=keep-id -v ./data:/data myimage -``` - -## Pod Management - -Pods group containers that share network and IPC namespaces (like Kubernetes pods). - -```bash -# Create a pod -podman pod create --name myapp-pod -p 8080:8080 -p 5432:5432 - -# Add containers to the pod -podman run -d --pod myapp-pod --name api myapi:latest -podman run -d --pod myapp-pod --name db postgres:16 - -# Containers in the same pod communicate via localhost -# API connects to DB at localhost:5432 - -# List pods -podman pod ls - -# Stop/start all containers in a pod -podman pod stop myapp-pod -podman pod start myapp-pod - -# Remove pod and all its containers -podman pod rm -f myapp-pod -``` - -## Volume Mounts - -```bash -# Named volume -podman volume create mydata -podman run -v mydata:/data myimage - -# Bind mount (use :Z for SELinux relabeling) -podman run -v ./local-dir:/container-dir:Z myimage - -# Read-only mount -podman run -v ./config:/etc/myapp/config:ro,Z myimage - -# tmpfs mount -podman run --tmpfs /tmp:size=100m myimage -``` - -## Networking - -```bash -# Create a network -podman network create mynet - -# Run containers on the network -podman run -d --network mynet --name api myapi:latest -podman run -d --network mynet --name db postgres:16 - -# Containers resolve each other by name -# api can connect to db:5432 - -# Inspect network -podman network inspect mynet - -# Connect existing container to network -podman network connect mynet existing-container -``` - -## Image Management - -```bash -# Pull image -podman pull docker.io/library/postgres:16 - -# List images -podman images - -# Tag image -podman tag myapp:latest registry.example.com/myapp:v1.0 - -# Push to registry -podman push registry.example.com/myapp:v1.0 - -# Remove unused images -podman image prune -a - -# Export/import images -podman save -o myapp.tar myapp:latest -podman load -i myapp.tar -``` - -## Podman Compose - -```bash -# Install podman-compose -pip install podman-compose - -# Use existing docker-compose.yml -podman-compose up -d -podman-compose down -podman-compose logs -f - -# Or use Docker Compose directly with Podman socket -systemctl --user enable --now podman.socket -export DOCKER_HOST=unix://$XDG_RUNTIME_DIR/podman/podman.sock -docker compose up -d # Uses Podman backend -``` - -## Generating Kubernetes YAML - -```bash -# Generate Kubernetes YAML from a pod -podman generate kube myapp-pod > myapp.yaml - -# Play (deploy) Kubernetes YAML -podman kube play myapp.yaml - -# Tear down -podman kube down myapp.yaml -``` - -## Useful Flags - -```bash -# Auto-remove container on exit -podman run --rm myimage - -# Resource limits -podman run --memory=512m --cpus=2 myimage - -# Environment variables -podman run -e KEY=value --env-file=.env myimage - -# Restart policy -podman run --restart=on-failure:3 myimage - -# Init process (reap zombies) -podman run --init myimage -``` diff --git a/plugins/flow/skills/postgres/SKILL.md b/plugins/flow/skills/postgres/SKILL.md deleted file mode 100644 index fe73656..0000000 --- a/plugins/flow/skills/postgres/SKILL.md +++ /dev/null @@ -1,450 +0,0 @@ ---- -name: postgres -description: "Use when writing PostgreSQL SQL, editing .sql files, psql commands, postgresql.conf, psycopg or asyncpg code, indexes, JSONB, PL/pgSQL, extensions, roles, RLS, replication, migrations, or query tuning." ---- - -# PostgreSQL - -PostgreSQL is an advanced open-source relational database with extensive support for SQL standards, JSONB, full-text search, PL/pgSQL, and extensibility. - -## Quick Reference - -### Connection Patterns - -```bash -# URI format -"postgresql://app:secret@localhost:5432/mydb?sslmode=require&application_name=myapp" - -# Multiple hosts (failover) -"postgresql://app:secret@primary:5432,standby:5432/mydb?target_session_attrs=read-write" -``` - -```python -# asyncpg (async) -pool = await asyncpg.create_pool("postgresql://app:secret@localhost/mydb", min_size=5, max_size=20) -async with pool.acquire() as conn: - rows = await conn.fetch("SELECT id, name FROM users WHERE status = $1", "active") - -# psycopg v3 (async) -async with await psycopg.AsyncConnection.connect(conninfo) as conn: - async with conn.cursor() as cur: - await cur.execute("SELECT id, name FROM users WHERE id = %s", (42,)) -``` - -### Indexing Essentials - -| Type | Best For | Example | -|------|----------|---------| -| B-tree (default) | Equality, range on scalars | `CREATE INDEX idx ON orders (created_at DESC)` | -| GIN | JSONB, arrays, full-text, trigram | `CREATE INDEX idx ON docs USING gin (data)` | -| GiST | Geometry, range types, nearest-neighbor | `CREATE INDEX idx ON events USING gist (during)` | -| BRIN | Large, naturally ordered (time-series) | `CREATE INDEX idx ON logs USING brin (ts)` | - -**Partial indexes** -- index only the rows that matter: - -```sql -CREATE INDEX idx_orders_active ON orders (user_id) - WHERE status IN ('pending', 'processing'); -``` - -### Key JSONB Patterns - -```sql --- Navigation -SELECT data->>'name' FROM docs; -- text extraction -SELECT data @> '{"status": "active"}' FROM docs; -- containment - --- GIN index for containment -CREATE INDEX idx_docs_data ON docs USING gin (data jsonb_path_ops); - --- Build objects -SELECT jsonb_build_object('id', u.id, 'name', u.name) FROM users u; -``` - -### EXPLAIN Usage - -```sql --- Full diagnostic -EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...; - --- Safe for mutating queries (no execution) -EXPLAIN (COSTS, VERBOSE) DELETE FROM orders WHERE created_at < '2020-01-01'; -``` - -| Symptom | Likely Cause | Fix | -|---------|-------------|-----| -| `Seq Scan` on large table | Missing/unused index | Create index, check predicate | -| `Sort Method: external merge Disk` | `work_mem` too low | Increase `work_mem` | -| High `Rows Removed by Filter` | Index not selective | Refine index, add partial index | - - - -## Workflow - -### Step 1: Schema Design - -Define tables with appropriate types. Use JSONB for semi-structured data, arrays for small sets, and normalized tables for relational data. Always define primary keys. - -### Step 2: Write Queries - -Use parameterized queries (`$1` for asyncpg, `%s` for psycopg). Use CTEs for readability. Prefer `EXISTS` over `IN` for correlated subqueries. - -### Step 3: Index Strategy - -Start with B-tree indexes on WHERE/JOIN/ORDER BY columns. Use partial indexes to limit index size. Add GIN indexes for JSONB containment queries. Prefer expression indexes for computed predicates. - -### Step 4: Performance Tuning - -Run `EXPLAIN (ANALYZE, BUFFERS)` on slow queries. Check `pg_stat_statements` for top queries by total time. Tune `shared_buffers`, `work_mem`, and autovacuum settings. - -### Step 5: Validate - -Confirm EXPLAIN plans use indexes. Check `pg_stat_user_tables` for sequential scan counts on large tables. Verify connection pooling (pgbouncer) is configured for production. - - - - - -## Guardrails - -- **Always use parameterized queries** -- never interpolate user input. Use `$1` placeholders (asyncpg) or `%s` (psycopg). -- **Prefer partial indexes** -- indexing only relevant rows reduces size and improves write performance. -- **EXPLAIN before optimizing** -- always measure before adding indexes or rewriting queries. Use `EXPLAIN (ANALYZE, BUFFERS)` for real execution stats. -- **Use JSONB, not JSON** -- JSONB is decomposed binary, supports GIN indexing and operators. Plain JSON is only for exact text preservation. -- **Connection pooling in production** -- use pgbouncer or built-in pool. Never open unbounded connections from application servers. -- **pg_stat_statements for production monitoring** -- identifies top queries by time, calls, and cache hit ratio. -- **Avoid `SELECT *`** -- name columns to enable covering indexes and prevent schema-change breakage. - - - - - -### Validation Checkpoint - -Before delivering PostgreSQL code, verify: - -- [ ] All queries use parameterized placeholders (no string interpolation) -- [ ] EXPLAIN output confirms index usage for critical queries -- [ ] Partial indexes are used where only a subset of rows is queried -- [ ] JSONB columns use GIN indexes for containment queries -- [ ] Connection pooling is addressed (pgbouncer or pool parameter) -- [ ] sslmode is set to at least `require` for non-local connections - - - - - -## Example - -**Task:** EXPLAIN ANALYZE and index optimization for a slow orders query. - -```sql --- Step 1: Check current plan -EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) -SELECT o.id, o.total, o.created_at, u.name - FROM orders o - JOIN users u ON u.id = o.user_id - WHERE o.status = 'pending' - AND o.created_at > NOW() - INTERVAL '7 days' - ORDER BY o.created_at DESC - LIMIT 50; - --- Step 2: If Seq Scan on orders, add a partial composite index -CREATE INDEX CONCURRENTLY idx_orders_pending_recent - ON orders (created_at DESC) - WHERE status = 'pending'; - --- Step 3: Re-run EXPLAIN to confirm Index Scan -EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) -SELECT o.id, o.total, o.created_at, u.name - FROM orders o - JOIN users u ON u.id = o.user_id - WHERE o.status = 'pending' - AND o.created_at > NOW() - INTERVAL '7 days' - ORDER BY o.created_at DESC - LIMIT 50; - --- Step 4: Check pg_stat_statements for overall impact -SELECT calls, round(mean_exec_time::numeric, 1) AS mean_ms, query - FROM pg_stat_statements - ORDER BY total_exec_time DESC - LIMIT 10; -``` - - - ---- - -## Monitoring Strategy - -### pg_stat_statements Setup - -Enable in `postgresql.conf` (requires restart): - -```ini -shared_preload_libraries = 'pg_stat_statements' -pg_stat_statements.track = all -pg_stat_statements.max = 10000 -``` - -```sql -CREATE EXTENSION IF NOT EXISTS pg_stat_statements; -``` - -### Key pg_stat_statements Queries - -```sql --- Top queries by total execution time -SELECT - round(total_exec_time::numeric, 1) AS total_ms, - calls, - round(mean_exec_time::numeric, 1) AS mean_ms, - round(stddev_exec_time::numeric, 1) AS stddev_ms, - round((100 * total_exec_time / sum(total_exec_time) OVER ())::numeric, 1) AS pct, - left(query, 120) AS query -FROM pg_stat_statements -ORDER BY total_exec_time DESC -LIMIT 20; - --- Top queries by average latency (outliers) -SELECT - calls, - round(mean_exec_time::numeric, 2) AS mean_ms, - left(query, 120) AS query -FROM pg_stat_statements -WHERE calls > 100 -ORDER BY mean_exec_time DESC -LIMIT 20; - --- Cache hit ratio per query -SELECT - calls, - round(100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0), 1) AS cache_hit_pct, - left(query, 120) AS query -FROM pg_stat_statements -ORDER BY shared_blks_read DESC -LIMIT 20; - --- Reset stats -SELECT pg_stat_statements_reset(); -``` - -### Sequential Scan Detection (pg_stat_user_tables) - -```sql --- Tables with high sequential scan counts -SELECT - schemaname, - relname AS table_name, - seq_scan, - seq_tup_read, - idx_scan, - round(100.0 * seq_scan / nullif(seq_scan + idx_scan, 0), 1) AS seq_pct, - n_live_tup -FROM pg_stat_user_tables -WHERE seq_scan > 0 - AND n_live_tup > 10000 -ORDER BY seq_scan DESC -LIMIT 20; -``` - -### Bloat Detection - -```sql --- Table bloat estimate -SELECT - schemaname, - tablename, - pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS total_size, - n_dead_tup, - n_live_tup, - round(100.0 * n_dead_tup / nullif(n_live_tup + n_dead_tup, 0), 1) AS dead_pct, - last_autovacuum, - last_autoanalyze -FROM pg_stat_user_tables -WHERE n_dead_tup > 1000 -ORDER BY n_dead_tup DESC -LIMIT 20; - --- Index bloat (using pg_relation_size vs estimated used) -SELECT - indexrelname, - pg_size_pretty(pg_relation_size(indexrelid)) AS index_size, - idx_scan, - idx_tup_read, - idx_tup_fetch -FROM pg_stat_user_indexes -ORDER BY pg_relation_size(indexrelid) DESC -LIMIT 20; -``` - -### Active Query Monitoring (pg_stat_activity) - -```sql --- Long-running queries -SELECT - pid, - now() - query_start AS duration, - state, - wait_event_type, - wait_event, - left(query, 100) AS query -FROM pg_stat_activity -WHERE state != 'idle' - AND query_start < now() - INTERVAL '30 seconds' -ORDER BY duration DESC; - --- Blocking and blocked queries -SELECT - blocked.pid AS blocked_pid, - blocking.pid AS blocking_pid, - left(blocked.query, 80) AS blocked_query, - left(blocking.query, 80) AS blocking_query -FROM pg_stat_activity blocked -JOIN pg_stat_activity blocking - ON blocking.pid = ANY(pg_blocking_pids(blocked.pid)) -WHERE cardinality(pg_blocking_pids(blocked.pid)) > 0; - --- Terminate a specific pid (superuser only) -SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid = ; -``` - ---- - -## Autovacuum Tuning - -### Per-Table Settings - -Override global autovacuum settings for high-churn tables: - -```sql --- High-churn table: trigger vacuum more aggressively -ALTER TABLE orders SET ( - autovacuum_vacuum_scale_factor = 0.01, -- 1% dead tuples (default 20%) - autovacuum_analyze_scale_factor = 0.005, -- 0.5% changed for analyze - autovacuum_vacuum_cost_delay = 2, -- ms; lower = faster vacuum - autovacuum_vacuum_threshold = 50, -- minimum dead tuples before trigger - autovacuum_analyze_threshold = 50 -); - --- Large append-only table: raise threshold to reduce noise -ALTER TABLE events SET ( - autovacuum_vacuum_scale_factor = 0.001, - autovacuum_analyze_scale_factor = 0.001 -); -``` - -### Dead Tuple Threshold Formula - -Autovacuum triggers when: - -```text -dead_tuples > autovacuum_vacuum_threshold + autovacuum_vacuum_scale_factor * n_live_tup -``` - -For a 10M-row table at the default `scale_factor=0.20`: - -- Threshold = 50 + 0.20 × 10,000,000 = **2,000,050 dead tuples** before vacuum runs. -- Reduce `scale_factor` to `0.01` for tables with frequent UPDATE/DELETE. - -### Global postgresql.conf Tuning - -```ini -# Reduce I/O impact of autovacuum -autovacuum_vacuum_cost_delay = 2ms # default 2ms (pg14+); was 20ms -autovacuum_vacuum_cost_limit = 400 # default 200; allows faster passes - -# Scale factor defaults (override per-table for hot tables) -autovacuum_vacuum_scale_factor = 0.05 # default 0.20 -autovacuum_analyze_scale_factor = 0.02 # default 0.10 - -# Worker count -autovacuum_max_workers = 5 # default 3 -``` - ---- - -## Connection Pooling - -### PgBouncer vs pgpool-II - -| Feature | PgBouncer | pgpool-II | -|---------|-----------|-----------| -| Primary purpose | Connection pooling | Pooling + load balancing + HA | -| Modes | Session, Transaction, Statement | Session, Transaction | -| Overhead | Very low (C, single process) | Higher (more features) | -| Read scaling | No built-in | Routes SELECTs to replicas | -| HA / failover | No (use external) | Yes (watchdog, VIP) | -| Complexity | Simple config | More complex | -| Typical use | Application → single primary | Need query routing or HA middleware | - -### PgBouncer Configuration (pgbouncer.ini) - -```ini -[databases] -mydb = host=127.0.0.1 port=5432 dbname=mydb - -[pgbouncer] -listen_port = 6432 -listen_addr = 0.0.0.0 -auth_type = scram-sha-256 -auth_file = /etc/pgbouncer/userlist.txt -pool_mode = transaction ; transaction mode = best performance -max_client_conn = 1000 -default_pool_size = 25 -min_pool_size = 5 -reserve_pool_size = 5 -reserve_pool_timeout = 3 -server_idle_timeout = 600 -log_connections = 0 -log_disconnections = 0 -``` - -### Transaction vs Session Mode - -| Mode | Behaviour | Use Case | -|------|-----------|----------| -| **Transaction** | Server connection held only during transaction | Stateless apps; highest concurrency | -| **Session** | Server connection held for full client session | Requires session state (temp tables, prepared statements) | -| **Statement** | Released after each statement | Rarely used; autocommit only | - -**Transaction mode caveat:** prepared statements and advisory locks are incompatible with transaction mode. Disable `prepared_statements` at the driver level or use `DEALLOCATE ALL` at transaction end. - ---- - -## Cross-References - -- **Gemini PostgreSQL extension**: `gemini extensions install https://github.com/gemini-cli-extensions/postgresql` — 24 tools for query execution, schema inspection, EXPLAIN analysis, and more. - ---- - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[Advanced SQL Patterns](references/queries.md)** -- CTEs, window functions, JSONB operations, array ops, lateral joins, recursive queries. -- **[Indexing & Performance](references/indexing.md)** -- Index types (B-tree, GIN, GiST, BRIN), partial indexes, expression indexes. -- **[Administration](references/admin.md)** -- Configuration, roles, connection pooling (pgbouncer), vacuuming, WAL. -- **[psql CLI](references/psql.md)** -- psql commands, \d meta-commands, .psqlrc customization. -- **[PL/pgSQL Development](references/plpgsql.md)** -- Functions, procedures, triggers, exception handling, DO blocks. -- **[Performance Tuning](references/performance.md)** -- EXPLAIN, pg_stat_statements, autovacuum, parallel query. -- **[Connection Patterns](references/connections.md)** -- psycopg v3, asyncpg, SQLAlchemy, node-postgres, Rust sqlx. -- **[JSON/JSONB Patterns](references/json.md)** -- JSONB operators, SQL/JSON path, GIN indexing, generated columns. -- **[Security](references/security.md)** -- Role management, RLS, column privileges, SSL/TLS, pgAudit. -- **[Key Extensions](references/extensions.md)** -- PostGIS, pgvector, pg_cron, pg_stat_statements, pg_trgm, TimescaleDB. -- **[Replication & HA](references/replication.md)** -- Streaming replication, logical replication, Patroni, PITR. -- **[Schema Migrations & DevOps](references/migrations.md)** -- Alembic, Flyway, zero-downtime migrations, pgTAP testing. - ---- - -## Official References - -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [PostgreSQL](https://github.com/cofin/flow/blob/main/templates/styleguides/databases/postgres_psql.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/postgres/agents/openai.yaml b/plugins/flow/skills/postgres/agents/openai.yaml deleted file mode 100644 index ee2add2..0000000 --- a/plugins/flow/skills/postgres/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "PostgreSQL" - short_description: "PostgreSQL SQL, psql, migrations, indexing, JSONB, PL/pgSQL, and operations" diff --git a/plugins/flow/skills/postgres/references/admin.md b/plugins/flow/skills/postgres/references/admin.md deleted file mode 100644 index 905781c..0000000 --- a/plugins/flow/skills/postgres/references/admin.md +++ /dev/null @@ -1,171 +0,0 @@ -# PostgreSQL Administration - -## Key Configuration (postgresql.conf) - -```ini -# Memory -shared_buffers = '4GB' # 25% of RAM -effective_cache_size = '12GB' # 75% of RAM -work_mem = '64MB' # Per-sort/hash operation -maintenance_work_mem = '1GB' # VACUUM, CREATE INDEX - -# WAL -wal_level = 'replica' # minimal, replica, logical -max_wal_size = '4GB' -min_wal_size = '1GB' -checkpoint_completion_target = 0.9 - -# Query Planner -random_page_cost = 1.1 # SSD (default 4.0 for HDD) -effective_io_concurrency = 200 # SSD - -# Connections -max_connections = 200 -``` - -### Reload vs Restart - -```sql --- Most settings only need reload -SELECT pg_reload_conf(); --- Or: pg_ctl reload -D /var/lib/postgresql/data - --- Some require restart (shared_buffers, max_connections, wal_level, etc.) --- Check: SELECT name, setting, pending_restart FROM pg_settings WHERE pending_restart; -``` - -## Authentication (pg_hba.conf) - -```text -# TYPE DATABASE USER ADDRESS METHOD -local all postgres peer -host all all 127.0.0.1/32 scram-sha-256 -host all all 10.0.0.0/8 scram-sha-256 -host replication replicator 10.0.0.0/8 scram-sha-256 -hostssl all all 0.0.0.0/0 scram-sha-256 -``` - -## Roles & Permissions - -```sql --- Create role with login -CREATE ROLE app_user WITH LOGIN PASSWORD 'secret' VALID UNTIL '2026-12-31'; - --- Read-only role pattern -CREATE ROLE readonly NOLOGIN; -GRANT CONNECT ON DATABASE mydb TO readonly; -GRANT USAGE ON SCHEMA public TO readonly; -GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly; -ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO readonly; - --- Assign to login role -GRANT readonly TO app_reader; - --- Row-level security -ALTER TABLE documents ENABLE ROW LEVEL SECURITY; -CREATE POLICY tenant_isolation ON documents - USING (tenant_id = current_setting('app.tenant_id')::int); -``` - -## Connection Pooling (PgBouncer) - -```ini -; pgbouncer.ini -[databases] -mydb = host=127.0.0.1 port=5432 dbname=mydb - -[pgbouncer] -listen_addr = 0.0.0.0 -listen_port = 6432 -auth_type = scram-sha-256 -auth_file = /etc/pgbouncer/userlist.txt - -; Pool modes: session (default), transaction, statement -pool_mode = transaction -max_client_conn = 1000 -default_pool_size = 25 -min_pool_size = 5 -reserve_pool_size = 5 -``` - -## Vacuuming - -```sql --- Manual vacuum -VACUUM (VERBOSE) my_table; -VACUUM (FULL) my_table; -- Rewrites table, exclusive lock - --- Check autovacuum status -SELECT relname, last_vacuum, last_autovacuum, n_dead_tup, n_live_tup -FROM pg_stat_user_tables -ORDER BY n_dead_tup DESC; - --- Tune autovacuum per-table for hot tables -ALTER TABLE hot_table SET ( - autovacuum_vacuum_scale_factor = 0.01, - autovacuum_analyze_scale_factor = 0.005, - autovacuum_vacuum_cost_delay = 10 -); -``` - -## WAL & Replication - -```sql --- Check WAL position -SELECT pg_current_wal_lsn(), pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0') AS wal_bytes; - --- Check replication status (on primary) -SELECT client_addr, state, sent_lsn, write_lsn, replay_lsn, - pg_wal_lsn_diff(sent_lsn, replay_lsn) AS replay_lag_bytes -FROM pg_stat_replication; - --- Logical replication -CREATE PUBLICATION my_pub FOR TABLE users, orders; --- On subscriber: -CREATE SUBSCRIPTION my_sub - CONNECTION 'host=primary dbname=mydb' - PUBLICATION my_pub; -``` - -## Backup & Restore - -```bash -# Logical backup -pg_dump -Fc -j4 -d mydb -f mydb.dump -pg_restore -j4 -d mydb mydb.dump - -# Base backup for PITR -pg_basebackup -D /backup/base -Ft -z -P -X stream - -# Point-in-time recovery (recovery.conf / postgresql.conf) -# restore_command = 'cp /backup/wal/%f %p' -# recovery_target_time = '2024-06-15 14:30:00' -``` - -## Useful Diagnostic Queries - -```sql --- Active queries -SELECT pid, now() - pg_stat_activity.query_start AS duration, query, state -FROM pg_stat_activity -WHERE state != 'idle' -ORDER BY duration DESC; - --- Blocking locks -SELECT blocked.pid AS blocked_pid, blocked.query AS blocked_query, - blocking.pid AS blocking_pid, blocking.query AS blocking_query -FROM pg_catalog.pg_locks bl -JOIN pg_stat_activity blocked ON bl.pid = blocked.pid -JOIN pg_catalog.pg_locks kl ON bl.locktype = kl.locktype - AND bl.relation = kl.relation AND bl.pid != kl.pid -JOIN pg_stat_activity blocking ON kl.pid = blocking.pid -WHERE NOT bl.granted; - --- Table sizes -SELECT relname, - pg_size_pretty(pg_total_relation_size(relid)) AS total, - pg_size_pretty(pg_relation_size(relid)) AS table_only, - pg_size_pretty(pg_indexes_size(relid)) AS indexes -FROM pg_catalog.pg_statio_user_tables -ORDER BY pg_total_relation_size(relid) DESC; -``` diff --git a/plugins/flow/skills/postgres/references/connections.md b/plugins/flow/skills/postgres/references/connections.md deleted file mode 100644 index 9076b14..0000000 --- a/plugins/flow/skills/postgres/references/connections.md +++ /dev/null @@ -1,331 +0,0 @@ -# Connection Patterns - -## Connection Strings - -```bash -# libpq key-value format -"host=localhost port=5432 dbname=mydb user=app password=secret sslmode=require" - -# URI format -"postgresql://app:secret@localhost:5432/mydb?sslmode=require&application_name=myapp" - -# Multiple hosts (failover) -"postgresql://app:secret@primary:5432,standby:5432/mydb?target_session_attrs=read-write" -``` - -### .pgpass (Password File) - -```bash -# ~/.pgpass (chmod 600) -# hostname:port:database:username:password -localhost:5432:mydb:app:secret -*:5432:*:admin:admin_pass -``` - -### pg_service.conf - -```ini -# ~/.pg_service.conf or /etc/pg_service.conf -[mydb-prod] -host=prod-primary.example.com -port=5432 -dbname=mydb -user=app -sslmode=verify-full - -[mydb-dev] -host=localhost -port=5432 -dbname=mydb_dev -user=dev -``` - -```bash -# Usage -psql "service=mydb-prod" -# Or set PGSERVICE=mydb-prod -``` - -### SSL/TLS Connections - -```bash -# sslmode options (in order of security): -# disable - no SSL -# allow - try non-SSL, fall back to SSL -# prefer - try SSL, fall back to non-SSL (default) -# require - must use SSL, no cert verification -# verify-ca - must use SSL, verify server cert CA -# verify-full - must use SSL, verify server cert CA + hostname - -# Client certificate authentication -"postgresql://app@host/db?sslmode=verify-full&sslcert=/path/client.crt&sslkey=/path/client.key&sslrootcert=/path/ca.crt" -``` - -## Python: psycopg (v3) - -```python -import psycopg -from psycopg.rows import dict_row - -# Simple connection -with psycopg.connect("postgresql://app:secret@localhost/mydb") as conn: - with conn.cursor(row_factory=dict_row) as cur: - cur.execute("SELECT id, name FROM users WHERE active = %s", (True,)) - for row in cur: - print(row["name"]) # dict access - -# Connection pool (recommended for web apps) -from psycopg_pool import ConnectionPool - -pool = ConnectionPool( - conninfo="postgresql://app:secret@localhost/mydb", - min_size=5, - max_size=20, - max_idle=300, # seconds before idle connections are closed -) - -with pool.connection() as conn: - result = conn.execute("SELECT * FROM users WHERE id = %s", (42,)).fetchone() - -# Async -import asyncio -from psycopg import AsyncConnection - -async def main(): - async with await AsyncConnection.connect("postgresql://app@localhost/mydb") as conn: - async with conn.cursor(row_factory=dict_row) as cur: - await cur.execute("SELECT * FROM users LIMIT 10") - rows = await cur.fetchall() - -# COPY for bulk data -with pool.connection() as conn: - with conn.cursor() as cur: - # COPY FROM (import) - with cur.copy("COPY users (name, email) FROM STDIN") as copy: - for name, email in data: - copy.write_row((name, email)) - - # COPY TO (export) - with cur.copy("COPY users TO STDOUT (FORMAT CSV, HEADER)") as copy: - for row in copy.rows(): - print(row) - -# Pipeline mode (batch multiple queries, reduce round-trips) -with pool.connection() as conn: - with conn.pipeline(): - conn.execute("INSERT INTO log VALUES (%s, %s)", (1, "a")) - conn.execute("INSERT INTO log VALUES (%s, %s)", (2, "b")) - conn.execute("INSERT INTO log VALUES (%s, %s)", (3, "c")) - # All sent in one network round-trip - -# Binary parameters for performance -with pool.connection() as conn: - cur = conn.execute( - "SELECT id, data FROM large_table WHERE id = ANY(%b)", # %b = binary - ([1, 2, 3],), - ) -``` - -## Python: asyncpg - -```python -import asyncpg -import asyncio - -async def main(): - # Single connection - conn = await asyncpg.connect("postgresql://app@localhost/mydb") - row = await conn.fetchrow("SELECT * FROM users WHERE id = $1", 42) - print(row["name"]) # Record access - await conn.close() - - # Connection pool (recommended) - pool = await asyncpg.create_pool( - "postgresql://app@localhost/mydb", - min_size=5, - max_size=20, - command_timeout=30, - ) - - async with pool.acquire() as conn: - # Prepared statements (cached automatically) - rows = await conn.fetch("SELECT * FROM users WHERE active = $1", True) - - # Transactions - async with conn.transaction(): - await conn.execute("UPDATE accounts SET balance = balance - $1 WHERE id = $2", 100, 1) - await conn.execute("UPDATE accounts SET balance = balance + $1 WHERE id = $2", 100, 2) - - # COPY for bulk import - await conn.copy_records_to_table( - "users", - columns=["name", "email"], - records=[("Alice", "alice@ex.com"), ("Bob", "bob@ex.com")], - ) - - # Custom type codecs - await conn.set_type_codec( - "json", encoder=json.dumps, decoder=json.loads, schema="pg_catalog" - ) - - await pool.close() - -asyncio.run(main()) -``` - -## Python: SQLAlchemy - -```python -from sqlalchemy import create_engine, text -from sqlalchemy.orm import Session - -# Sync engine (psycopg) -engine = create_engine( - "postgresql+psycopg://app:secret@localhost/mydb", - pool_size=20, - max_overflow=10, - pool_pre_ping=True, # detect stale connections - pool_recycle=3600, # recycle connections after 1 hour -) - -with Session(engine) as session: - result = session.execute(text("SELECT * FROM users WHERE id = :id"), {"id": 42}) - user = result.mappings().one() - -# Async engine (psycopg or asyncpg) -from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker - -async_engine = create_async_engine( - "postgresql+asyncpg://app:secret@localhost/mydb", - # or "postgresql+psycopg://..." for psycopg async - pool_size=20, -) - -AsyncSession = async_sessionmaker(async_engine, expire_on_commit=False) - -async with AsyncSession() as session: - result = await session.execute(text("SELECT * FROM users LIMIT 10")) - rows = result.all() -``` - -## Node.js: node-postgres (pg) - -```javascript -import pg from "pg"; - -// Connection pool (always use pools in production) -const pool = new pg.Pool({ - connectionString: "postgresql://app:secret@localhost/mydb", - max: 20, - idleTimeoutMillis: 30000, - connectionTimeoutMillis: 5000, -}); - -// Parameterized query (prevents SQL injection) -const { rows } = await pool.query( - "SELECT * FROM users WHERE id = $1 AND active = $2", - [42, true] -); - -// Transaction -const client = await pool.connect(); -try { - await client.query("BEGIN"); - await client.query("UPDATE accounts SET balance = balance - $1 WHERE id = $2", [100, 1]); - await client.query("UPDATE accounts SET balance = balance + $1 WHERE id = $2", [100, 2]); - await client.query("COMMIT"); -} catch (e) { - await client.query("ROLLBACK"); - throw e; -} finally { - client.release(); -} - -// Cursor for large result sets -const cursor = client.query(new pg.Cursor("SELECT * FROM large_table")); -let rows = await cursor.read(100); // read 100 rows at a time -while (rows.length > 0) { - // process rows - rows = await cursor.read(100); -} - -// LISTEN/NOTIFY -const listener = await pool.connect(); -listener.on("notification", (msg) => { - console.log(msg.channel, msg.payload); -}); -await listener.query("LISTEN order_updates"); - -// From another connection: -// NOTIFY order_updates, '{"order_id": 123}'; -// Or: SELECT pg_notify('order_updates', '{"order_id": 123}'); -``` - -## Rust - -### sqlx (Compile-Time Checked Queries) - -```rust -use sqlx::postgres::PgPoolOptions; - -#[tokio::main] -async fn main() -> Result<(), sqlx::Error> { - let pool = PgPoolOptions::new() - .max_connections(20) - .connect("postgresql://app:secret@localhost/mydb") - .await?; - - // Compile-time checked query (requires DATABASE_URL at build time) - let user = sqlx::query_as!( - User, - "SELECT id, name, email FROM users WHERE id = $1", - 42_i64 - ) - .fetch_one(&pool) - .await?; - - // Dynamic query - let rows = sqlx::query("SELECT * FROM users WHERE active = $1") - .bind(true) - .fetch_all(&pool) - .await?; - - Ok(()) -} -``` - -### tokio-postgres + deadpool - -```rust -use deadpool_postgres::{Config, Runtime}; -use tokio_postgres::NoTls; - -let mut cfg = Config::new(); -cfg.host = Some("localhost".into()); -cfg.dbname = Some("mydb".into()); -cfg.user = Some("app".into()); - -let pool = cfg.create_pool(Some(Runtime::Tokio1), NoTls)?; -let client = pool.get().await?; - -let rows = client - .query("SELECT id, name FROM users WHERE active = $1", &[&true]) - .await?; -``` - -## Connection Pool Sizing - -```text -# Rule of thumb for pool size: -# pool_size = (core_count * 2) + effective_spindle_count -# For SSD: pool_size ~ core_count * 2 + 1 -# Example: 8 cores, SSD -> ~17 connections - -# PgBouncer sitting between app and PG: -# App pool size: 50-100 (cheap, just PgBouncer connections) -# PgBouncer default_pool_size: 20-30 (actual PG connections) -# max_connections in PG: 100-300 - -# Key principle: fewer PG connections = better throughput -# due to reduced context switching and lock contention -``` diff --git a/plugins/flow/skills/postgres/references/extensions.md b/plugins/flow/skills/postgres/references/extensions.md deleted file mode 100644 index 34b730b..0000000 --- a/plugins/flow/skills/postgres/references/extensions.md +++ /dev/null @@ -1,279 +0,0 @@ -# Key Extensions - -## Installing and Managing Extensions - -```sql --- List available extensions -SELECT * FROM pg_available_extensions ORDER BY name; - --- Install -CREATE EXTENSION IF NOT EXISTS pg_trgm; - --- Check installed extensions -SELECT extname, extversion FROM pg_extension; - --- Upgrade -ALTER EXTENSION pg_trgm UPDATE TO '1.6'; - --- Some extensions require shared_preload_libraries (restart needed) --- postgresql.conf: --- shared_preload_libraries = 'pg_stat_statements, pgaudit, pg_cron' -``` - -## PostGIS (Geospatial) - -```sql -CREATE EXTENSION postgis; - --- Geometry types -CREATE TABLE places ( - id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY, - name text NOT NULL, - location geometry(Point, 4326), -- WGS 84 (GPS coordinates) - boundary geometry(Polygon, 4326) -); - --- Insert a point (longitude, latitude) -INSERT INTO places (name, location) -VALUES ('Office', ST_SetSRID(ST_MakePoint(-73.9857, 40.7484), 4326)); - --- Spatial index (required for performance) -CREATE INDEX idx_places_location ON places USING gist (location); - --- Find places within 5km of a point -SELECT name, ST_Distance( - location::geography, - ST_SetSRID(ST_MakePoint(-73.9857, 40.7484), 4326)::geography -) AS distance_meters -FROM places -WHERE ST_DWithin( - location::geography, - ST_SetSRID(ST_MakePoint(-73.9857, 40.7484), 4326)::geography, - 5000 -- meters -) -ORDER BY distance_meters; - --- Point in polygon -SELECT p.name -FROM places p -WHERE ST_Contains(p.boundary, ST_SetSRID(ST_MakePoint(-73.98, 40.75), 4326)); - --- Nearest neighbor (KNN) -SELECT name, location <-> ST_SetSRID(ST_MakePoint(-73.98, 40.75), 4326) AS dist -FROM places -ORDER BY location <-> ST_SetSRID(ST_MakePoint(-73.98, 40.75), 4326) -LIMIT 5; -``` - -## pgvector (Vector Similarity Search) - -```sql -CREATE EXTENSION vector; - --- Vector column -CREATE TABLE embeddings ( - id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY, - content text, - embedding vector(1536) -- OpenAI ada-002 dimension -); - --- Insert embeddings -INSERT INTO embeddings (content, embedding) -VALUES ('PostgreSQL is great', '[0.1, 0.2, ...]'::vector); - --- Similarity search (cosine distance) -SELECT id, content, - 1 - (embedding <=> query_embedding) AS similarity -FROM embeddings -ORDER BY embedding <=> '[0.1, 0.2, ...]'::vector -LIMIT 10; - --- Distance operators: --- <-> L2 (Euclidean) distance --- <=> cosine distance --- <#> negative inner product - --- HNSW index (recommended, PG16+/pgvector 0.5+) -CREATE INDEX idx_embeddings_hnsw ON embeddings - USING hnsw (embedding vector_cosine_ops) - WITH (m = 16, ef_construction = 64); --- Set ef_search at query time: -SET hnsw.ef_search = 100; - --- IVFFlat index (older, still useful for very large datasets) -CREATE INDEX idx_embeddings_ivf ON embeddings - USING ivfflat (embedding vector_cosine_ops) - WITH (lists = 100); --- Set probes at query time: -SET ivfflat.probes = 10; - --- Embedding workflow: store, index, query --- 1. Generate embeddings in app code (OpenAI, Cohere, etc.) --- 2. INSERT into table with vector column --- 3. Create HNSW index --- 4. Query with ORDER BY <=> LIMIT N -``` - -## pg_cron (Job Scheduling) - -```sql --- shared_preload_libraries = 'pg_cron' (requires restart) -CREATE EXTENSION pg_cron; - --- Schedule a job (cron syntax) -SELECT cron.schedule( - 'nightly-cleanup', -- job name - '0 3 * * *', -- 3 AM daily - $$DELETE FROM logs WHERE created_at < now() - interval '90 days'$$ -); - --- Schedule with specific database -SELECT cron.schedule_in_database( - 'vacuum-analytics', '0 4 * * 0', -- Sundays at 4 AM - 'VACUUM ANALYZE analytics.events', - 'analytics_db' -); - --- List scheduled jobs -SELECT * FROM cron.job; - --- View job run history -SELECT * FROM cron.job_run_details ORDER BY start_time DESC LIMIT 20; - --- Unschedule -SELECT cron.unschedule('nightly-cleanup'); -``` - -## pg_stat_statements (Query Performance) - -```sql --- shared_preload_libraries = 'pg_stat_statements' -CREATE EXTENSION pg_stat_statements; - --- Top queries by total time -SELECT calls, total_exec_time::int AS total_ms, - mean_exec_time::int AS mean_ms, query -FROM pg_stat_statements -ORDER BY total_exec_time DESC LIMIT 10; - --- See pg performance.md for detailed usage -``` - -## pg_trgm (Trigram Similarity / Fuzzy Search) - -```sql -CREATE EXTENSION pg_trgm; - --- Similarity score (0 to 1) -SELECT similarity('PostgreSQL', 'Postgre') AS sim; --- 0.5384... - --- Fuzzy search with threshold -SELECT name, similarity(name, 'Postgres') AS sim -FROM products -WHERE similarity(name, 'Postgres') > 0.3 -ORDER BY sim DESC; - --- LIKE/ILIKE acceleration with GIN index -CREATE INDEX idx_products_name_trgm ON products USING gin (name gin_trgm_ops); --- Now LIKE '%ostgre%' uses the index (not just prefix matches) - -SELECT * FROM products WHERE name ILIKE '%database%'; -- uses GIN trgm index - --- GiST index (supports <-> distance operator for KNN) -CREATE INDEX idx_products_name_gist ON products USING gist (name gist_trgm_ops); - --- Nearest match -SELECT name, name <-> 'Postgrse' AS distance -FROM products -ORDER BY name <-> 'Postgrse' -LIMIT 5; -``` - -## ltree (Hierarchical Data) - -```sql -CREATE EXTENSION ltree; - -CREATE TABLE categories ( - id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY, - name text NOT NULL, - path ltree NOT NULL -); - -CREATE INDEX idx_categories_path ON categories USING gist (path); - --- Insert hierarchical data -INSERT INTO categories (name, path) VALUES - ('Electronics', 'electronics'), - ('Phones', 'electronics.phones'), - ('Android', 'electronics.phones.android'), - ('Laptops', 'electronics.laptops'); - --- Ancestor query (all children of electronics) -SELECT * FROM categories WHERE path <@ 'electronics'; - --- Descendant query (all ancestors of android) -SELECT * FROM categories WHERE path @> 'electronics.phones.android'; - --- Direct children -SELECT * FROM categories WHERE path ~ 'electronics.*{1}'; - --- Path operations -SELECT nlevel(path), subpath(path, 0, 2) FROM categories; -``` - -## citext (Case-Insensitive Text) - -```sql -CREATE EXTENSION citext; - -CREATE TABLE users ( - id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY, - email citext UNIQUE NOT NULL -); - --- Comparisons are case-insensitive automatically -INSERT INTO users (email) VALUES ('User@Example.COM'); -SELECT * FROM users WHERE email = 'user@example.com'; -- matches --- No need for lower() in queries or indexes -``` - -## TimescaleDB (Time-Series) - -```sql --- Requires separate installation (not in core contrib) -CREATE EXTENSION timescaledb; - --- Convert regular table to hypertable -CREATE TABLE metrics ( - time timestamptz NOT NULL, - device_id bigint NOT NULL, - temperature double precision, - humidity double precision -); - -SELECT create_hypertable('metrics', 'time'); - --- Automatic time-based partitioning behind the scenes --- Query as normal table -SELECT time_bucket('1 hour', time) AS bucket, - device_id, - avg(temperature) AS avg_temp -FROM metrics -WHERE time > now() - interval '7 days' -GROUP BY bucket, device_id -ORDER BY bucket DESC; - --- Continuous aggregates (materialized views that auto-refresh) -CREATE MATERIALIZED VIEW hourly_metrics -WITH (timescaledb.continuous) AS -SELECT time_bucket('1 hour', time) AS bucket, - device_id, - avg(temperature), min(temperature), max(temperature) -FROM metrics -GROUP BY bucket, device_id; - --- Retention policies -SELECT add_retention_policy('metrics', interval '90 days'); -``` diff --git a/plugins/flow/skills/postgres/references/indexing.md b/plugins/flow/skills/postgres/references/indexing.md deleted file mode 100644 index 74fd096..0000000 --- a/plugins/flow/skills/postgres/references/indexing.md +++ /dev/null @@ -1,142 +0,0 @@ -# Indexing & Performance - -## Index Types - -### B-tree (default) - -Best for equality and range queries on scalar types. - -```sql -CREATE INDEX idx_users_email ON users (email); -CREATE INDEX idx_orders_date ON orders (created_at DESC); - --- Composite index (leftmost prefix rule applies) -CREATE INDEX idx_orders_user_date ON orders (user_id, created_at DESC); -``` - -### GIN (Generalized Inverted Index) - -Best for JSONB, arrays, full-text search, and trigram similarity. - -```sql --- JSONB containment queries -CREATE INDEX idx_docs_data ON documents USING gin (data); --- Supports: data @> '{"key": "value"}' - --- JSONB path operations (more selective) -CREATE INDEX idx_docs_data_path ON documents USING gin (data jsonb_path_ops); - --- Array containment -CREATE INDEX idx_posts_tags ON posts USING gin (tags); - --- Full-text search -CREATE INDEX idx_articles_fts ON articles USING gin (to_tsvector('english', title || ' ' || body)); - --- Trigram similarity (requires pg_trgm) -CREATE INDEX idx_users_name_trgm ON users USING gin (name gin_trgm_ops); -``` - -### GiST (Generalized Search Tree) - -Best for geometric, range, and nearest-neighbor queries. - -```sql --- Range types -CREATE INDEX idx_events_during ON events USING gist (during); --- Supports: during && '[2024-01-01, 2024-02-01)'::tsrange - --- PostGIS geometry -CREATE INDEX idx_places_geom ON places USING gist (geom); - --- Exclusion constraints -ALTER TABLE reservations ADD CONSTRAINT no_overlap - EXCLUDE USING gist (room WITH =, during WITH &&); -``` - -### BRIN (Block Range Index) - -Best for large, naturally ordered tables (time-series, append-only). - -```sql --- Very small index for large time-series tables -CREATE INDEX idx_logs_time ON logs USING brin (created_at) - WITH (pages_per_range = 32); -``` - -## Partial Indexes - -Only index rows matching a condition. Smaller, faster. - -```sql -CREATE INDEX idx_orders_pending ON orders (created_at) - WHERE status = 'pending'; - -CREATE INDEX idx_users_active_email ON users (email) - WHERE deleted_at IS NULL; -``` - -## Expression Indexes - -Index on computed expressions. - -```sql -CREATE INDEX idx_users_lower_email ON users (lower(email)); --- Query must match: WHERE lower(email) = 'user@example.com' - -CREATE INDEX idx_events_year ON events (EXTRACT(YEAR FROM created_at)); -``` - -## Covering Indexes (INCLUDE) - -Include non-key columns in the index for index-only scans. - -```sql -CREATE INDEX idx_orders_user_covering - ON orders (user_id) - INCLUDE (total, status); --- Avoids heap fetches for: SELECT total, status FROM orders WHERE user_id = 1; -``` - -## EXPLAIN ANALYZE - -```sql --- Always use ANALYZE to get actual timings -EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...; - --- Key things to look for: --- - Seq Scan on large tables (missing index?) --- - Nested Loop with high row counts (consider hash/merge join) --- - Sort with "Sort Method: external merge" (increase work_mem) --- - Buffers: shared hit vs shared read (cache effectiveness) -``` - -## Index Maintenance - -```sql --- Check index usage -SELECT schemaname, relname, indexrelname, idx_scan, idx_tup_read -FROM pg_stat_user_indexes -ORDER BY idx_scan ASC; - --- Find unused indexes -SELECT indexrelid::regclass AS index, relid::regclass AS table, idx_scan -FROM pg_stat_user_indexes -WHERE idx_scan = 0 - AND schemaname = 'public'; - --- Rebuild bloated indexes -REINDEX INDEX CONCURRENTLY idx_users_email; -``` - -## Statistics & Tuning - -```sql --- Increase statistics target for better query plans -ALTER TABLE orders ALTER COLUMN status SET STATISTICS 1000; -ANALYZE orders; - --- Check table statistics -SELECT attname, n_distinct, most_common_vals, correlation -FROM pg_stats -WHERE tablename = 'orders'; -``` diff --git a/plugins/flow/skills/postgres/references/json.md b/plugins/flow/skills/postgres/references/json.md deleted file mode 100644 index be6ed73..0000000 --- a/plugins/flow/skills/postgres/references/json.md +++ /dev/null @@ -1,246 +0,0 @@ -# JSON/JSONB Patterns - -## JSONB vs JSON - -| Feature | `jsonb` | `json` | -|---------|---------|--------| -| Storage | Decomposed binary | Raw text | -| Duplicate keys | Last value wins | All kept | -| Key ordering | Not preserved | Preserved | -| Indexing (GIN) | Yes | No | -| Operators (@>, ?, etc.) | Yes | No | -| Speed (read) | Faster | Slower (re-parsed each time) | -| Speed (write) | Slightly slower (conversion) | Faster | - -**Use JSONB almost always.** Use JSON only when you need exact text preservation or duplicate keys. - -## Operators - -```sql --- Navigation operators -SELECT data->'address' -- jsonb: returns JSONB object -FROM documents; - -SELECT data->>'name' -- text: returns text value -FROM documents; - -SELECT data#>'{address,city}' -- jsonb: path navigation, returns JSONB -FROM documents; - -SELECT data#>>'{tags,0}' -- text: path navigation, returns text -FROM documents; - --- Containment -SELECT * FROM docs WHERE data @> '{"status": "active"}'; -- left contains right -SELECT * FROM docs WHERE '{"status": "active"}' <@ data; -- right contains left - --- Existence -SELECT * FROM docs WHERE data ? 'email'; -- key exists -SELECT * FROM docs WHERE data ?| ARRAY['email', 'phone']; -- any key exists -SELECT * FROM docs WHERE data ?& ARRAY['email', 'phone']; -- all keys exist - --- Equality -SELECT * FROM docs WHERE data->'address' = '{"city": "NYC"}'::jsonb; -``` - -## Building JSON/JSONB - -```sql --- Build objects -SELECT jsonb_build_object( - 'id', u.id, - 'name', u.name, - 'email', u.email, - 'address', jsonb_build_object('city', a.city, 'state', a.state) -) -FROM users u JOIN addresses a ON a.user_id = u.id; - --- Build arrays -SELECT jsonb_build_array(1, 'two', true, null); --- [1, "two", true, null] - --- Aggregate rows into a JSON array -SELECT jsonb_agg( - jsonb_build_object('id', id, 'name', name) - ORDER BY name -) -FROM users WHERE active; --- [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}] - --- Aggregate key-value pairs into an object -SELECT jsonb_object_agg(key, value) -FROM settings; --- {"theme": "dark", "lang": "en"} - --- From row to JSON -SELECT to_jsonb(u.*) FROM users u WHERE id = 1; --- {"id": 1, "name": "Alice", "email": "alice@example.com", ...} -``` - -## SQL/JSON Path Language (PG12+) - -```sql --- jsonb_path_query: extract matching values -SELECT jsonb_path_query(data, '$.tags[*] ? (@ starts with "pg")') -FROM documents; - --- jsonb_path_exists: check if path matches -SELECT * FROM documents -WHERE jsonb_path_exists(data, '$.tags[*] ? (@ == "postgres")'); - --- jsonb_path_query_array: return matches as array -SELECT jsonb_path_query_array(data, '$.items[*].price ? (@ > 100)') -FROM orders; - --- jsonb_path_query_first: first match only -SELECT jsonb_path_query_first(data, '$.items[0].name') -FROM orders; - --- Path expressions with filters --- $ root --- .key object member --- [*] all array elements --- [0] array index --- ?() filter expression --- @ current item in filter - --- Examples: --- '$.address.city' -> navigate to city --- '$.items[*].price ? (@ > 50)' -> prices over 50 --- '$.users[*] ? (@.age >= 18)' -> users 18+ --- '$.items[*] ? (@.qty > 0).name' -> names where qty > 0 -``` - -## GIN Indexing for JSONB - -```sql --- Default GIN index: supports @>, ?, ?|, ?& -CREATE INDEX idx_docs_data ON documents USING gin (data); - --- jsonb_path_ops: supports only @>, but smaller and faster -CREATE INDEX idx_docs_data_pathops ON documents USING gin (data jsonb_path_ops); - --- Index a specific key (for targeted queries) -CREATE INDEX idx_docs_status ON documents USING btree ((data->>'status')); -CREATE INDEX idx_docs_score ON documents USING btree (((data->>'score')::int)); - --- GIN on a nested path -CREATE INDEX idx_docs_tags ON documents USING gin ((data->'tags')); --- Supports: WHERE data->'tags' @> '["postgres"]' - --- jsonb_path_ops is preferred when you only use @> containment queries --- Default ops is needed for ?, ?|, ?& existence checks -``` - -## Modifying JSONB - -```sql --- Concatenation (merge / overwrite) -UPDATE docs SET data = data || '{"status": "archived", "priority": 1}'::jsonb -WHERE id = 1; - --- Remove a key -UPDATE docs SET data = data - 'temp_field' WHERE id = 1; - --- Remove nested key by path -UPDATE docs SET data = data #- '{address,zip}' WHERE id = 1; - --- Remove array element by index -UPDATE docs SET data = data - 0 WHERE id = 1; -- remove first element (if array) - --- jsonb_set: set a value at a path -UPDATE docs SET data = jsonb_set(data, '{address,city}', '"Boston"') -WHERE id = 1; - --- jsonb_set with create_if_missing (default true) -UPDATE docs SET data = jsonb_set(data, '{new_field}', '"new_value"', true) -WHERE id = 1; - --- jsonb_insert: insert into arrays -UPDATE docs SET data = jsonb_insert(data, '{tags,0}', '"urgent"') -WHERE id = 1; --- Inserts "urgent" at position 0 of tags array - --- Deep merge (recursive, custom function often needed) --- Simple top-level merge uses || -UPDATE docs SET data = data || '{"score": 5}'::jsonb; -``` - -## Querying Nested Structures Efficiently - -```sql --- Expand JSONB array into rows -SELECT d.id, tag.value AS tag -FROM documents d, - jsonb_array_elements_text(d.data->'tags') AS tag(value); - --- Expand JSONB object into key-value rows -SELECT d.id, kv.key, kv.value -FROM documents d, - jsonb_each_text(d.data->'metadata') AS kv(key, value); - --- Nested array of objects -SELECT - o.id, - item->>'name' AS item_name, - (item->>'price')::numeric AS price, - (item->>'qty')::int AS qty -FROM orders o, - jsonb_array_elements(o.data->'items') AS item; - --- Aggregate back after expansion -SELECT o.id, - sum((item->>'price')::numeric * (item->>'qty')::int) AS total -FROM orders o, - jsonb_array_elements(o.data->'items') AS item -GROUP BY o.id; - --- EXISTS subquery pattern (often faster than @>) -SELECT * FROM orders -WHERE EXISTS ( - SELECT 1 - FROM jsonb_array_elements(data->'items') AS item - WHERE (item->>'price')::numeric > 100 -); -``` - -## Generated Columns from JSONB (PG12+) - -```sql --- Extract frequently-queried fields into generated columns -ALTER TABLE documents - ADD COLUMN status text GENERATED ALWAYS AS (data->>'status') STORED; - -ALTER TABLE documents - ADD COLUMN score int GENERATED ALWAYS AS ((data->>'score')::int) STORED; - --- Now you can index the generated column directly -CREATE INDEX idx_docs_status ON documents (status); -CREATE INDEX idx_docs_score ON documents (score); - --- Queries can use either the generated column or the JSONB path -SELECT * FROM documents WHERE status = 'active'; --- Uses the B-tree index on the generated column -``` - -## Common Patterns - -```sql --- COALESCE with JSONB (handle missing keys) -SELECT COALESCE(data->>'nickname', data->>'name', 'Anonymous') AS display_name -FROM users; - --- Check for null vs missing key -SELECT * -FROM documents -WHERE data->'field' IS NOT NULL -- key exists (value might be JSON null) - AND data->>'field' IS NOT NULL; -- key exists AND value is not JSON null - --- Type checking -SELECT * -FROM documents -WHERE jsonb_typeof(data->'tags') = 'array'; - --- Pretty print -SELECT jsonb_pretty(data) FROM documents WHERE id = 1; -``` diff --git a/plugins/flow/skills/postgres/references/migrations.md b/plugins/flow/skills/postgres/references/migrations.md deleted file mode 100644 index 8a726f5..0000000 --- a/plugins/flow/skills/postgres/references/migrations.md +++ /dev/null @@ -1,364 +0,0 @@ -# Schema Migrations & DevOps - -## Alembic (SQLAlchemy) - -### Setup - -```bash -pip install alembic -alembic init migrations -``` - -```python -# migrations/env.py (key patterns) -from sqlalchemy import engine_from_config -from myapp.models import Base # your declarative base - -target_metadata = Base.metadata - -def run_migrations_online(): - connectable = engine_from_config(config.get_section("alembic"), prefix="sqlalchemy.") - with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) - with context.begin_transaction(): - context.run_migrations() -``` - -### Common Commands - -```bash -# Auto-generate migration from model changes -alembic revision --autogenerate -m "add users table" - -# Create empty migration -alembic revision -m "add custom index" - -# Apply all migrations -alembic upgrade head - -# Rollback one step -alembic downgrade -1 - -# Show current revision -alembic current - -# Show migration history -alembic history --verbose - -# Upgrade to specific revision -alembic upgrade abc123 - -# Stamp current DB as specific revision (skip running migrations) -alembic stamp head -``` - -### Migration File Patterns - -```python -"""add users table""" -revision = 'abc123' -down_revision = 'def456' - -from alembic import op -import sqlalchemy as sa - -def upgrade(): - op.create_table( - 'users', - sa.Column('id', sa.BigInteger, primary_key=True), - sa.Column('email', sa.Text, nullable=False), - sa.Column('name', sa.Text), - sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()), - ) - op.create_index('idx_users_email', 'users', ['email'], unique=True) - -def downgrade(): - op.drop_table('users') -``` - -### Branching and Merging - -```bash -# Create branch from specific revision -alembic revision --head abc123 -m "branch feature" - -# Merge two heads -alembic merge -m "merge branches" abc123 def456 - -# Show multiple heads -alembic heads -``` - -## Flyway - -### Naming Conventions - -```text -sql/ - V1__create_users.sql # versioned (V{version}__{description}.sql) - V2__add_orders.sql - V2.1__add_order_status.sql - R__refresh_views.sql # repeatable (R__{description}.sql, re-run when changed) - U1__undo_users.sql # undo (U{version}__{description}.sql, Flyway Teams) -``` - -### Commands - -```bash -flyway -url=jdbc:postgresql://localhost/mydb -user=app migrate -flyway info # show migration status -flyway validate # check applied vs available -flyway repair # fix metadata table -flyway baseline # baseline existing database -flyway clean # drop all objects (DANGER) -``` - -### Callbacks - -```text -sql/ - beforeMigrate.sql # runs before each migration - afterMigrate.sql # runs after all migrations - beforeEachMigrate.sql # before each migration file - afterEachMigrate.sql # after each migration file -``` - -```sql --- afterMigrate.sql (example: refresh materialized views) -REFRESH MATERIALIZED VIEW CONCURRENTLY mv_dashboard_stats; -``` - -### Baseline (Existing Database) - -```bash -# Mark existing database as version 1 (skip V1 migration) -flyway -baselineVersion=1 baseline - -# Future migrations (V2+) will run normally -flyway migrate -``` - -## Zero-Downtime Migrations - -### CREATE INDEX CONCURRENTLY - -```sql --- Regular CREATE INDEX locks writes for entire duration --- CONCURRENTLY allows reads AND writes during index build -CREATE INDEX CONCURRENTLY idx_orders_status ON orders (status); - --- Caveats: --- 1. Cannot run inside a transaction block --- 2. Takes longer (two table scans) --- 3. If it fails, leaves an INVALID index; clean up with: -DROP INDEX IF EXISTS idx_orders_status; - --- Check for invalid indexes -SELECT indexrelid::regclass, indisvalid -FROM pg_index -WHERE NOT indisvalid; -``` - -### ADD COLUMN with Defaults (PG11+) - -```sql --- PG11+: ADD COLUMN with DEFAULT is instant (no table rewrite!) -ALTER TABLE orders ADD COLUMN priority integer DEFAULT 0; --- This is safe and instant even on large tables - --- Before PG11, this rewrote the entire table --- Workaround (pre-PG11): -ALTER TABLE orders ADD COLUMN priority integer; --- Then backfill in batches: -UPDATE orders SET priority = 0 WHERE id BETWEEN 1 AND 10000; -UPDATE orders SET priority = 0 WHERE id BETWEEN 10001 AND 20000; --- ... -ALTER TABLE orders ALTER COLUMN priority SET DEFAULT 0; -ALTER TABLE orders ALTER COLUMN priority SET NOT NULL; -- only after backfill -``` - -### NOT VALID Constraints - -```sql --- Add constraint without checking existing rows (instant) -ALTER TABLE orders ADD CONSTRAINT chk_positive_total - CHECK (total >= 0) NOT VALID; - --- New inserts/updates are validated immediately --- Validate existing rows later (takes AccessShareLock, not AccessExclusiveLock) -ALTER TABLE orders VALIDATE CONSTRAINT chk_positive_total; - --- Same pattern for foreign keys -ALTER TABLE orders ADD CONSTRAINT fk_orders_user - FOREIGN KEY (user_id) REFERENCES users (id) NOT VALID; --- Later: -ALTER TABLE orders VALIDATE CONSTRAINT fk_orders_user; -``` - -### Safe Column Type Changes - -```sql --- These are instant (no rewrite): --- varchar(N) -> varchar(M) where M > N --- varchar(N) -> text --- numeric(P,S) -> numeric (remove constraint) - --- These REWRITE the table (acquire AccessExclusiveLock): --- integer -> bigint --- text -> integer --- Change of numeric precision - --- Safe alternative for type changes: --- 1. Add new column -ALTER TABLE orders ADD COLUMN amount_v2 bigint; --- 2. Backfill (in batches) -UPDATE orders SET amount_v2 = amount WHERE id BETWEEN 1 AND 10000; --- 3. Add NOT NULL after backfill -ALTER TABLE orders ALTER COLUMN amount_v2 SET NOT NULL; --- 4. Swap with rename -ALTER TABLE orders RENAME COLUMN amount TO amount_old; -ALTER TABLE orders RENAME COLUMN amount_v2 TO amount; --- 5. Drop old column later -ALTER TABLE orders DROP COLUMN amount_old; -``` - -### Safe Enum Changes - -```sql --- Adding values is safe (PG10+) -ALTER TYPE order_status ADD VALUE 'refunded'; --- Cannot be done inside a transaction (PG11 fixed: IF NOT EXISTS variant) -ALTER TYPE order_status ADD VALUE IF NOT EXISTS 'refunded'; - --- Removing/renaming values requires creating a new type and migrating -``` - -### Lock Timeout for Safety - -```sql --- Prevent long locks during migrations -SET lock_timeout = '5s'; --- If the lock cannot be acquired in 5s, the statement fails instead of blocking -ALTER TABLE orders ADD COLUMN priority integer DEFAULT 0; --- Retry if it fails - --- Per-statement in migration scripts -SET lock_timeout = '5s'; -SET statement_timeout = '60s'; -``` - -## pgTAP (Testing Framework) - -```sql --- Install -CREATE EXTENSION pgtap; - --- Basic test structure -BEGIN; -SELECT plan(5); - --- Table existence -SELECT has_table('users'); -SELECT has_column('users', 'email'); -SELECT col_type_is('users', 'email', 'text'); -SELECT col_not_null('users', 'email'); - --- Index existence -SELECT has_index('users', 'idx_users_email'); - --- Custom assertions -SELECT results_eq( - 'SELECT count(*)::int FROM users WHERE active', - ARRAY[42], - 'Should have 42 active users' -); - --- Function testing -SELECT is( - get_user_balance(1), - 100.00::numeric, - 'User 1 balance should be 100' -); - -SELECT finish(); -ROLLBACK; -- clean up test data -``` - -```bash -# Run pgTAP tests with pg_prove -pg_prove -d mydb -v tests/*.sql - -# Or run directly -psql -d mydb -f tests/test_users.sql -``` - -## pg_dump / pg_restore - -### Common Patterns - -```bash -# Full database backup (custom format, parallel) -pg_dump -Fc -j4 -d mydb -f mydb.dump - -# Schema only (for diffing or creating empty DBs) -pg_dump -Fc --schema-only -d mydb -f mydb_schema.dump - -# Data only -pg_dump -Fc --data-only -d mydb -f mydb_data.dump - -# Specific tables -pg_dump -Fc -t users -t orders -d mydb -f subset.dump - -# Specific schema -pg_dump -Fc -n public -d mydb -f public_schema.dump - -# Exclude tables -pg_dump -Fc -T audit_log -T temp_* -d mydb -f mydb_no_audit.dump -``` - -### Restore - -```bash -# Restore full backup (parallel) -pg_restore -j4 -d mydb_new mydb.dump - -# Restore specific tables -pg_restore -t users -t orders -d mydb_new mydb.dump - -# Schema only restore -pg_restore --schema-only -d mydb_new mydb.dump - -# Data only restore -pg_restore --data-only -d mydb_new mydb.dump - -# List contents of dump -pg_restore -l mydb.dump - -# Clean (drop objects before creating) -pg_restore --clean --if-exists -d mydb mydb.dump - -# Use a list file to selectively restore -pg_restore -l mydb.dump > restore.list -# Edit restore.list to comment out unwanted items -pg_restore -L restore.list -d mydb_new mydb.dump -``` - -### Directory Format (Best for Large DBs) - -```bash -# Dump to directory (enables parallel dump) -pg_dump -Fd -j8 -d mydb -f /backup/mydb_dir/ - -# Restore from directory (parallel) -pg_restore -Fd -j8 -d mydb_new /backup/mydb_dir/ -``` - -### Plain SQL Output - -```bash -# Plain text SQL (useful for inspection, version control) -pg_dump -Fp -d mydb -f mydb.sql - -# Restore plain SQL -psql -d mydb_new -f mydb.sql -v ON_ERROR_STOP=1 -``` diff --git a/plugins/flow/skills/postgres/references/performance.md b/plugins/flow/skills/postgres/references/performance.md deleted file mode 100644 index 803dd8c..0000000 --- a/plugins/flow/skills/postgres/references/performance.md +++ /dev/null @@ -1,290 +0,0 @@ -# Performance Tuning - -## EXPLAIN: Reading Execution Plans - -```sql --- Full diagnostic output -EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...; - --- JSON format (useful for tools like explain.depesz.com) -EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) SELECT ...; - --- Without actually executing (safe for mutating queries) -EXPLAIN (COSTS, VERBOSE) DELETE FROM orders WHERE created_at < '2020-01-01'; -``` - -### Key Things to Look For - -| Symptom | Likely Cause | Fix | -|---------|-------------|-----| -| `Seq Scan` on large table | Missing or unused index | Create index, check predicate match | -| `Sort Method: external merge Disk` | `work_mem` too low | Increase `work_mem` | -| High `actual loops` on Nested Loop | Large outer set | Consider hash/merge join, or LIMIT | -| `Buffers: shared read` >> `shared hit` | Cold cache or undersized `shared_buffers` | Increase `shared_buffers`, warm cache | -| `Rows Removed by Filter` very high | Index not selective enough | Refine index, add partial index | -| `HashAgg Batches > 0` | `work_mem` too low for hash agg | Increase `work_mem` | - -## pg_stat_statements - -```sql --- Enable (requires restart first time) --- postgresql.conf: shared_preload_libraries = 'pg_stat_statements' -CREATE EXTENSION IF NOT EXISTS pg_stat_statements; - --- Top 10 queries by total time -SELECT - calls, - round(total_exec_time::numeric, 1) AS total_ms, - round(mean_exec_time::numeric, 1) AS mean_ms, - round((stddev_exec_time)::numeric, 1) AS stddev_ms, - rows, - query -FROM pg_stat_statements -ORDER BY total_exec_time DESC -LIMIT 10; - --- Queries with worst hit ratio (most disk reads) -SELECT - query, - calls, - shared_blks_hit, - shared_blks_read, - round(100.0 * shared_blks_hit / NULLIF(shared_blks_hit + shared_blks_read, 0), 1) AS hit_pct -FROM pg_stat_statements -ORDER BY shared_blks_read DESC -LIMIT 10; - --- Reset statistics -SELECT pg_stat_statements_reset(); -``` - -## Index Tuning - -### Partial Indexes - -```sql --- Only index what queries actually filter on -CREATE INDEX idx_orders_pending ON orders (created_at) - WHERE status = 'pending'; --- Much smaller than indexing all rows; queries MUST include WHERE status = 'pending' -``` - -### Covering Indexes (INCLUDE, PG11+) - -```sql --- Enables index-only scans by including non-key columns -CREATE INDEX idx_orders_user_covering - ON orders (user_id) - INCLUDE (total, status, created_at); - --- Verify index-only scan in EXPLAIN: --- "Index Only Scan using idx_orders_user_covering" -``` - -### Expression Indexes - -```sql -CREATE INDEX idx_users_lower_email ON users (lower(email)); --- Query MUST use lower(email) = '...' to use this index - -CREATE INDEX idx_events_date ON events (date(created_at)); -CREATE INDEX idx_data_name ON documents ((data->>'name')); -``` - -### Multicolumn Index Ordering - -```sql --- Leftmost prefix rule: index on (a, b, c) supports queries on (a), (a,b), (a,b,c) --- but NOT (b), (c), or (b,c) alone -CREATE INDEX idx_orders_multi ON orders (user_id, status, created_at DESC); -``` - -### Index-Only Scans - -```sql --- Check if visibility map is up to date (needed for index-only scans) -SELECT relname, - n_tup_mod, - last_vacuum, - last_autovacuum -FROM pg_stat_user_tables -WHERE relname = 'orders'; --- If heap fetches are high in EXPLAIN, run VACUUM to update visibility map -``` - -## Table Statistics - -```sql --- Force statistics refresh -ANALYZE orders; -ANALYZE; -- all tables - --- Increase statistics target for skewed columns -ALTER TABLE orders ALTER COLUMN status SET STATISTICS 1000; -- default 100 -ANALYZE orders; - --- Check statistics -SELECT attname, n_distinct, most_common_vals, most_common_freqs, correlation -FROM pg_stats -WHERE tablename = 'orders' AND attname = 'status'; - --- Extended statistics for correlated columns (PG10+) -CREATE STATISTICS stat_orders_region_product (dependencies, ndistinct, mcv) - ON region, product FROM orders; -ANALYZE orders; -``` - -## Autovacuum Tuning - -```sql --- Monitor autovacuum activity -SELECT relname, - n_live_tup, - n_dead_tup, - round(100.0 * n_dead_tup / NULLIF(n_live_tup + n_dead_tup, 0), 1) AS dead_pct, - last_autovacuum, - last_autoanalyze, - autovacuum_count, - autoanalyze_count -FROM pg_stat_user_tables -ORDER BY n_dead_tup DESC; - --- Per-table autovacuum settings for hot tables -ALTER TABLE hot_table SET ( - autovacuum_vacuum_scale_factor = 0.01, -- trigger at 1% dead (default 20%) - autovacuum_analyze_scale_factor = 0.005, - autovacuum_vacuum_cost_delay = 2, -- more aggressive (default 2ms in PG12+) - autovacuum_vacuum_cost_limit = 1000 -); - --- Monitor transaction ID wraparound risk -SELECT datname, - age(datfrozenxid) AS xid_age, - current_setting('autovacuum_freeze_max_age')::bigint AS freeze_max -FROM pg_database -ORDER BY age(datfrozenxid) DESC; --- Danger zone: xid_age approaching 2 billion - --- Check table-level freeze age -SELECT relname, age(relfrozenxid) AS xid_age -FROM pg_class -WHERE relkind = 'r' -ORDER BY age(relfrozenxid) DESC -LIMIT 10; - --- Bloat estimation (simple) -SELECT - schemaname, relname, - n_live_tup, - n_dead_tup, - pg_size_pretty(pg_total_relation_size(relid)) AS total_size -FROM pg_stat_user_tables -WHERE n_dead_tup > 10000 -ORDER BY n_dead_tup DESC; -``` - -## Connection Pooling - -### PgBouncer - -```ini -; pgbouncer.ini -[pgbouncer] -pool_mode = transaction ; recommended for most apps -max_client_conn = 1000 -default_pool_size = 25 ; connections per user/db pair -min_pool_size = 5 -reserve_pool_size = 5 -reserve_pool_timeout = 3 -server_idle_timeout = 600 -; session mode: needed for prepared statements, LISTEN/NOTIFY, temp tables -; transaction mode: best throughput, some features unavailable -; statement mode: most restrictive, only simple queries -``` - -### pgcat (Modern Alternative) - -- Supports query-level load balancing across replicas -- Built-in sharding support -- Prometheus metrics built in -- Configuration via TOML - -## Parallel Query - -```sql --- Key configuration -SET max_parallel_workers_per_gather = 4; -- workers per query node -SET max_parallel_workers = 8; -- total across all queries -SET min_parallel_table_scan_size = '8MB'; -SET min_parallel_index_scan_size = '512kB'; -SET parallel_tuple_cost = 0.01; - --- Parallel query kicks in when: --- 1. Table is large enough (> min_parallel_table_scan_size) --- 2. Query plan benefits from parallelism --- 3. Not inside a transaction with serializable isolation --- 4. Not a cursor or CTE scan --- 5. No functions marked PARALLEL UNSAFE - --- Check: look for "Workers Planned/Launched" in EXPLAIN -EXPLAIN (ANALYZE) SELECT count(*) FROM large_table WHERE status = 'active'; --- Gather (Workers Planned: 4, Workers Launched: 4) --- -> Parallel Seq Scan on large_table - --- Mark functions as parallel-safe to enable parallel plans -CREATE FUNCTION my_func(x int) RETURNS int -LANGUAGE sql PARALLEL SAFE IMMUTABLE -AS $$ SELECT x * 2 $$; -``` - -## Common Bottlenecks and Fixes - -### Sequential Scans on Large Tables - -```sql --- Identify seq scans -SELECT relname, seq_scan, idx_scan, - seq_scan - idx_scan AS too_many_seqs -FROM pg_stat_user_tables -WHERE seq_scan > idx_scan -ORDER BY seq_scan DESC; -``` - -### Lock Contention - -```sql --- Find blocking queries -SELECT - blocked.pid AS blocked_pid, - blocked.query AS blocked_query, - blocked.wait_event_type, - blocking.pid AS blocking_pid, - blocking.query AS blocking_query, - now() - blocked.query_start AS blocked_duration -FROM pg_stat_activity blocked -JOIN pg_locks bl ON bl.pid = blocked.pid AND NOT bl.granted -JOIN pg_locks kl ON kl.locktype = bl.locktype - AND kl.database IS NOT DISTINCT FROM bl.database - AND kl.relation IS NOT DISTINCT FROM bl.relation - AND kl.page IS NOT DISTINCT FROM bl.page - AND kl.tuple IS NOT DISTINCT FROM bl.tuple - AND kl.pid != bl.pid - AND kl.granted -JOIN pg_stat_activity blocking ON kl.pid = blocking.pid; - --- Advisory locks for application-level locking -SELECT pg_advisory_lock(hashtext('my_job_name')); -- blocks until acquired -SELECT pg_try_advisory_lock(12345); -- non-blocking, returns bool -SELECT pg_advisory_unlock(12345); -``` - -### Memory Tuning Checklist - -```ini -# postgresql.conf -shared_buffers = '4GB' # 25% of RAM (start here) -effective_cache_size = '12GB' # 75% of RAM (tells planner about OS cache) -work_mem = '64MB' # per-sort; total = work_mem * sorts * connections -maintenance_work_mem = '1GB' # for VACUUM, CREATE INDEX -huge_pages = try # reduces TLB misses on Linux -``` diff --git a/plugins/flow/skills/postgres/references/plpgsql.md b/plugins/flow/skills/postgres/references/plpgsql.md deleted file mode 100644 index 2d7c71b..0000000 --- a/plugins/flow/skills/postgres/references/plpgsql.md +++ /dev/null @@ -1,332 +0,0 @@ -# PL/pgSQL Development - -## Functions - -```sql --- Basic function returning a scalar -CREATE OR REPLACE FUNCTION get_user_balance(p_user_id bigint) -RETURNS numeric -LANGUAGE plpgsql -STABLE -- or VOLATILE, IMMUTABLE -AS $$ -DECLARE - v_balance numeric; -BEGIN - SELECT balance INTO v_balance - FROM accounts - WHERE user_id = p_user_id; - - IF NOT FOUND THEN - RAISE EXCEPTION 'User % not found', p_user_id - USING ERRCODE = 'P0002'; - END IF; - - RETURN v_balance; -END; -$$; -``` - -## Procedures (PG11+) - -```sql --- Procedures can manage transactions (COMMIT/ROLLBACK inside) -CREATE OR REPLACE PROCEDURE transfer_funds( - p_from bigint, - p_to bigint, - p_amount numeric -) -LANGUAGE plpgsql -AS $$ -BEGIN - UPDATE accounts SET balance = balance - p_amount WHERE user_id = p_from; - UPDATE accounts SET balance = balance + p_amount WHERE user_id = p_to; - - -- Can commit mid-procedure - COMMIT; -END; -$$; - -CALL transfer_funds(1, 2, 100.00); -``` - -## Variable Declaration - -```sql -DECLARE - v_count integer := 0; - v_name text; - v_row users%ROWTYPE; -- full row type from table - v_email users.email%TYPE; -- column type - v_record record; -- untyped, assigned at runtime - v_arr integer[] := ARRAY[1,2,3]; - v_const constant text := 'fixed'; -``` - -## Control Flow - -### IF / ELSIF / ELSE - -```sql -IF v_status = 'active' THEN - v_rate := 0.05; -ELSIF v_status = 'premium' THEN - v_rate := 0.02; -ELSE - v_rate := 0.10; -END IF; -``` - -### CASE - -```sql --- Simple CASE -CASE v_status - WHEN 'active' THEN v_label := 'Active'; - WHEN 'inactive' THEN v_label := 'Inactive'; - ELSE v_label := 'Unknown'; -END CASE; - --- Searched CASE -CASE - WHEN v_amount > 1000 THEN v_tier := 'high'; - WHEN v_amount > 100 THEN v_tier := 'medium'; - ELSE v_tier := 'low'; -END CASE; -``` - -### Loops - -```sql --- Basic LOOP with EXIT -LOOP - FETCH cur INTO v_row; - EXIT WHEN NOT FOUND; - -- process v_row -END LOOP; - --- FOR loop over integer range -FOR i IN 1..10 LOOP - RAISE NOTICE 'Iteration %', i; -END LOOP; - --- FOR loop over query results -FOR v_rec IN SELECT id, name FROM users WHERE active LOOP - RAISE NOTICE 'User: % %', v_rec.id, v_rec.name; -END LOOP; - --- FOREACH over array -FOREACH v_element IN ARRAY v_arr LOOP - RAISE NOTICE 'Element: %', v_element; -END LOOP; - --- WHILE loop -WHILE v_count < 100 LOOP - v_count := v_count + 1; -END LOOP; -``` - -## Exception Handling - -```sql -BEGIN - INSERT INTO orders (id, total) VALUES (p_id, p_total); -EXCEPTION - WHEN unique_violation THEN - RAISE NOTICE 'Order % already exists, updating', p_id; - UPDATE orders SET total = p_total WHERE id = p_id; - WHEN foreign_key_violation THEN - RAISE EXCEPTION 'Invalid reference in order %', p_id; - WHEN OTHERS THEN - -- Capture full error context - DECLARE - v_msg text; - v_detail text; - v_hint text; - BEGIN - GET STACKED DIAGNOSTICS - v_msg = MESSAGE_TEXT, - v_detail = PG_EXCEPTION_DETAIL, - v_hint = PG_EXCEPTION_HINT; - RAISE WARNING 'Unexpected error: % (detail: %, hint: %)', - v_msg, v_detail, v_hint; - RAISE; -- re-raise original exception - END; -END; -``` - -### RAISE Levels - -```sql -RAISE DEBUG 'detailed debug info: %', v_data; -RAISE LOG 'server log message'; -RAISE NOTICE 'informational: %', v_info; -- most common for dev -RAISE WARNING 'something unexpected: %', v_msg; -RAISE EXCEPTION 'fatal: %', v_msg - USING ERRCODE = 'P0001', - DETAIL = 'More context here', - HINT = 'Try doing X instead'; -``` - -## Set-Returning Functions - -```sql --- RETURN NEXT (row-by-row) -CREATE OR REPLACE FUNCTION active_users_with_orders() -RETURNS SETOF record -LANGUAGE plpgsql -AS $$ -DECLARE - v_rec record; -BEGIN - FOR v_rec IN - SELECT u.id, u.name, count(o.id) AS order_count - FROM users u JOIN orders o ON o.user_id = u.id - WHERE u.active - GROUP BY u.id, u.name - LOOP - RETURN NEXT v_rec; - END LOOP; -END; -$$; - --- RETURN QUERY (preferred, simpler) -CREATE OR REPLACE FUNCTION get_active_users() -RETURNS TABLE(user_id bigint, user_name text) -LANGUAGE plpgsql -STABLE -AS $$ -BEGIN - RETURN QUERY - SELECT id, name FROM users WHERE active ORDER BY name; -END; -$$; - --- Usage -SELECT * FROM get_active_users(); -``` - -## Triggers - -```sql --- Trigger function (must return trigger) -CREATE OR REPLACE FUNCTION audit_changes() -RETURNS trigger -LANGUAGE plpgsql -AS $$ -BEGIN - IF TG_OP = 'INSERT' THEN - INSERT INTO audit_log (table_name, op, new_data, changed_at) - VALUES (TG_TABLE_NAME, 'INSERT', to_jsonb(NEW), now()); - RETURN NEW; - ELSIF TG_OP = 'UPDATE' THEN - INSERT INTO audit_log (table_name, op, old_data, new_data, changed_at) - VALUES (TG_TABLE_NAME, 'UPDATE', to_jsonb(OLD), to_jsonb(NEW), now()); - RETURN NEW; - ELSIF TG_OP = 'DELETE' THEN - INSERT INTO audit_log (table_name, op, old_data, changed_at) - VALUES (TG_TABLE_NAME, 'DELETE', to_jsonb(OLD), now()); - RETURN OLD; - END IF; -END; -$$; - --- Row-level trigger (fires once per affected row) -CREATE TRIGGER trg_orders_audit - AFTER INSERT OR UPDATE OR DELETE ON orders - FOR EACH ROW - EXECUTE FUNCTION audit_changes(); - --- Statement-level trigger (fires once per statement) -CREATE TRIGGER trg_orders_stmt - AFTER INSERT ON orders - FOR EACH STATEMENT - EXECUTE FUNCTION notify_batch_insert(); - --- BEFORE trigger to modify data before insert -CREATE OR REPLACE FUNCTION set_updated_at() -RETURNS trigger -LANGUAGE plpgsql -AS $$ -BEGIN - NEW.updated_at := now(); - RETURN NEW; -END; -$$; - -CREATE TRIGGER trg_set_updated - BEFORE INSERT OR UPDATE ON orders - FOR EACH ROW - EXECUTE FUNCTION set_updated_at(); - --- Transition tables (PG10+, statement-level only) -CREATE TRIGGER trg_orders_transition - AFTER INSERT ON orders - REFERENCING NEW TABLE AS new_orders - FOR EACH STATEMENT - EXECUTE FUNCTION process_new_orders(); --- Inside function: SELECT * FROM new_orders -``` - -## DO Blocks (Anonymous Code) - -```sql -DO $$ -DECLARE - v_count integer; -BEGIN - SELECT count(*) INTO v_count FROM users WHERE active; - RAISE NOTICE 'Active users: %', v_count; - - IF v_count > 1000 THEN - PERFORM pg_notify('admin', 'High user count: ' || v_count); - END IF; -END; -$$; -``` - -## Composite Types and Domains - -```sql --- Custom composite type -CREATE TYPE address AS ( - street text, - city text, - state text, - zip text -); - -CREATE TABLE customers ( - id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY, - name text NOT NULL, - home address, - work address -); - --- Access composite fields -SELECT (home).city, (work).zip FROM customers; - --- Custom domain with constraints -CREATE DOMAIN email_address AS text - CHECK (VALUE ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'); - -CREATE DOMAIN positive_int AS integer - CHECK (VALUE > 0); -``` - -## Security Definer vs Invoker - -```sql --- Runs with privileges of the function owner (like SUID) -CREATE FUNCTION admin_only_reset(p_user_id bigint) -RETURNS void -LANGUAGE plpgsql -SECURITY DEFINER -SET search_path = public -- always set search_path with SECURITY DEFINER -AS $$ -BEGIN - UPDATE users SET password_hash = NULL WHERE id = p_user_id; -END; -$$; - --- Default: SECURITY INVOKER (runs with caller's privileges) -``` diff --git a/plugins/flow/skills/postgres/references/psql.md b/plugins/flow/skills/postgres/references/psql.md deleted file mode 100644 index 2fb295f..0000000 --- a/plugins/flow/skills/postgres/references/psql.md +++ /dev/null @@ -1,134 +0,0 @@ -# psql CLI Reference - -## Connection - -```bash -# Standard connection -psql -h localhost -p 5432 -U myuser -d mydb - -# Connection string -psql "postgresql://myuser:pass@localhost:5432/mydb?sslmode=require" - -# Environment variables -export PGHOST=localhost PGPORT=5432 PGUSER=myuser PGDATABASE=mydb -psql -``` - -## Essential Meta-Commands - -```text -\l List databases -\c dbname Connect to database -\dt List tables -\dt+ List tables with sizes -\d tablename Describe table (columns, indexes, constraints) -\di List indexes -\df List functions -\dv List views -\dn List schemas -\du List roles -\dx List extensions -\dp List table privileges (access permissions) -``` - -## Query Execution - -```text -\i file.sql Execute SQL from file -\e Edit last query in $EDITOR -\g Re-execute last query -\watch 5 Re-execute query every 5 seconds -\timing on Show query execution time -\x auto Toggle expanded output (auto = when wide) -``` - -## Output Formatting - -```text -\pset format csv Output as CSV -\pset format html Output as HTML -\copy table TO '/tmp/data.csv' CSV HEADER -\copy table FROM '/tmp/data.csv' CSV HEADER - --- Inline CSV export -\o /tmp/output.csv -SELECT * FROM users; -\o -``` - -## Transaction Control - -```text -\set AUTOCOMMIT off -BEGIN; --- ... statements ... -COMMIT; --- or ROLLBACK; -``` - -## Variables & Scripting - -```bash -# Pass variables from command line -psql -v tenant_id=42 -f query.sql - -# In SQL file: -SELECT * FROM users WHERE tenant_id = :tenant_id; -SELECT * FROM users WHERE name = :'name_var'; -- quoted -``` - -```text --- Inside psql -\set my_table users -SELECT * FROM :my_table WHERE id = 1; -\echo :my_table -``` - -## Conditional Logic in psql Scripts - -```sql -\if :is_production - SET statement_timeout = '30s'; -\else - SET statement_timeout = '0'; -\endif -``` - -## .psqlrc Customization - -```sql --- ~/.psqlrc -\set QUIET 1 -\pset null '(null)' -\set HISTSIZE 10000 -\set HISTCONTROL ignoredups -\timing on -\x auto -\set PROMPT1 '%[%033[1;32m%]%n@%/%[%033[0m%]%R%# ' -\set PROMPT2 '%R%# ' - --- Handy shortcuts -\set activity 'SELECT pid, now() - pg_stat_activity.query_start AS duration, query, state FROM pg_stat_activity WHERE state != \'idle\' ORDER BY duration DESC;' -\set locks 'SELECT pid, locktype, relation::regclass, mode, granted FROM pg_locks WHERE relation IS NOT NULL ORDER BY relation;' -\set sizes 'SELECT relname, pg_size_pretty(pg_total_relation_size(relid)) AS total FROM pg_catalog.pg_statio_user_tables ORDER BY pg_total_relation_size(relid) DESC LIMIT 20;' - -\set QUIET 0 -\echo 'Custom psqlrc loaded. Shortcuts: :activity :locks :sizes' -``` - -## Useful One-Liners - -```bash -# Quick query from shell -psql -d mydb -c "SELECT count(*) FROM users" - -# Tab-separated output (for scripting) -psql -d mydb -t -A -F$'\t' -c "SELECT id, name FROM users" - -# Execute and exit -psql -d mydb -f migrate.sql -v ON_ERROR_STOP=1 - -# Parallel dump/restore -pg_dump -Fd -j4 -d mydb -f /backup/mydb_dir -pg_restore -Fd -j4 -d mydb_new /backup/mydb_dir -``` diff --git a/plugins/flow/skills/postgres/references/queries.md b/plugins/flow/skills/postgres/references/queries.md deleted file mode 100644 index c85b829..0000000 --- a/plugins/flow/skills/postgres/references/queries.md +++ /dev/null @@ -1,152 +0,0 @@ -# Advanced SQL Patterns - -## Common Table Expressions (CTEs) - -```sql --- Basic CTE -WITH active_users AS ( - SELECT id, name, email - FROM users - WHERE status = 'active' - AND last_login > NOW() - INTERVAL '30 days' -) -SELECT au.name, COUNT(o.id) AS order_count -FROM active_users au -JOIN orders o ON o.user_id = au.id -GROUP BY au.name; -``` - -## Recursive CTEs - -```sql --- Org hierarchy traversal -WITH RECURSIVE org_tree AS ( - -- Base case: top-level managers - SELECT id, name, manager_id, 1 AS depth - FROM employees - WHERE manager_id IS NULL - - UNION ALL - - -- Recursive step - SELECT e.id, e.name, e.manager_id, ot.depth + 1 - FROM employees e - JOIN org_tree ot ON e.manager_id = ot.id -) -SELECT * FROM org_tree ORDER BY depth, name; -``` - -## Window Functions - -```sql --- Running total and row numbering -SELECT - date, - amount, - SUM(amount) OVER (ORDER BY date) AS running_total, - ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY date DESC) AS rn, - LAG(amount) OVER (PARTITION BY customer_id ORDER BY date) AS prev_amount, - RANK() OVER (ORDER BY amount DESC) AS amount_rank -FROM transactions; - --- Percent of total -SELECT - department, - salary, - salary::numeric / SUM(salary) OVER () * 100 AS pct_of_total, - salary::numeric / SUM(salary) OVER (PARTITION BY department) * 100 AS pct_of_dept -FROM employees; -``` - -## JSONB Operations - -```sql --- Access nested fields -SELECT - data->>'name' AS name, -- text extraction - data->'address'->>'city' AS city, -- nested access - data#>>'{tags,0}' AS first_tag, -- path extraction - jsonb_array_length(data->'tags') AS tag_count -FROM documents; - --- JSONB containment and existence -SELECT * FROM documents WHERE data @> '{"status": "active"}'; -SELECT * FROM documents WHERE data ? 'email'; -- key exists -SELECT * FROM documents WHERE data ?| ARRAY['a','b']; -- any key exists -SELECT * FROM documents WHERE data ?& ARRAY['a','b']; -- all keys exist - --- JSONB aggregation -SELECT jsonb_agg(name) AS names FROM users WHERE active; -SELECT jsonb_object_agg(key, value) FROM settings; - --- Update JSONB fields -UPDATE documents -SET data = jsonb_set(data, '{status}', '"archived"') -WHERE id = 1; -``` - -## Array Operations - -```sql --- Array literals and functions -SELECT ARRAY[1, 2, 3] AS nums; -SELECT array_agg(name ORDER BY name) FROM users; - --- Array operators -SELECT * FROM posts WHERE tags @> ARRAY['postgres']; -- contains -SELECT * FROM posts WHERE tags && ARRAY['go', 'rust']; -- overlap (any) -SELECT * FROM posts WHERE 'sql' = ANY(tags); -- element in array -SELECT unnest(tags) AS tag FROM posts; -- expand array to rows -``` - -## LATERAL Joins - -```sql --- Get top-3 orders per customer -SELECT c.name, top_orders.* -FROM customers c -CROSS JOIN LATERAL ( - SELECT o.id, o.total, o.created_at - FROM orders o - WHERE o.customer_id = c.id - ORDER BY o.total DESC - LIMIT 3 -) AS top_orders; -``` - -## Upsert (INSERT ... ON CONFLICT) - -```sql -INSERT INTO metrics (key, value, updated_at) -VALUES ('page_views', 1, NOW()) -ON CONFLICT (key) -DO UPDATE SET - value = metrics.value + EXCLUDED.value, - updated_at = EXCLUDED.updated_at; -``` - -## FILTER Clause for Conditional Aggregation - -```sql -SELECT - COUNT(*) AS total, - COUNT(*) FILTER (WHERE status = 'active') AS active, - COUNT(*) FILTER (WHERE status = 'inactive') AS inactive, - AVG(amount) FILTER (WHERE amount > 0) AS avg_positive -FROM accounts; -``` - -## GROUPING SETS / ROLLUP / CUBE - -```sql -SELECT region, product, SUM(sales) -FROM orders -GROUP BY GROUPING SETS ( - (region, product), - (region), - (product), - () -); --- ROLLUP(region, product) = (region, product), (region), () --- CUBE(region, product) = all combinations -``` diff --git a/plugins/flow/skills/postgres/references/replication.md b/plugins/flow/skills/postgres/references/replication.md deleted file mode 100644 index be3027e..0000000 --- a/plugins/flow/skills/postgres/references/replication.md +++ /dev/null @@ -1,300 +0,0 @@ -# Replication & High Availability - -## Streaming Replication (Physical) - -### Primary Configuration - -```ini -# postgresql.conf on primary -wal_level = replica # or 'logical' for logical replication -max_wal_senders = 10 # max concurrent standby connections -wal_keep_size = '1GB' # retain WAL for slow standbys (PG13+) -hot_standby = on # allow read queries on standby -``` - -```text -# pg_hba.conf — allow replication connections -host replication replicator 10.0.0.0/8 scram-sha-256 -``` - -```sql --- Create replication role -CREATE ROLE replicator WITH REPLICATION LOGIN PASSWORD 'strong_password'; -``` - -### Standby Setup - -```bash -# Create base backup from primary -pg_basebackup -h primary-host -U replicator -D /var/lib/postgresql/data \ - -Fp -Xs -P -R -# -R creates standby.signal and sets primary_conninfo in postgresql.auto.conf - -# Verify standby.signal exists -ls /var/lib/postgresql/data/standby.signal - -# Start standby -pg_ctl start -D /var/lib/postgresql/data -``` - -```ini -# postgresql.auto.conf on standby (created by -R flag) -primary_conninfo = 'host=primary-host port=5432 user=replicator password=strong_password' -``` - -### Synchronous Replication - -```ini -# postgresql.conf on primary -synchronous_standby_names = 'FIRST 1 (standby1, standby2)' -# Modes: -# FIRST N (s1, s2, s3) — wait for N standbys in priority order -# ANY N (s1, s2, s3) — wait for any N standbys -# '*' — any single standby - -synchronous_commit = on # on = wait for standby WAL flush - # remote_apply = wait for standby to apply - # remote_write = wait for standby OS write -``` - -### Monitor Replication - -```sql --- On primary: check replication status -SELECT client_addr, application_name, state, - sent_lsn, write_lsn, flush_lsn, replay_lsn, - pg_wal_lsn_diff(sent_lsn, replay_lsn) AS replay_lag_bytes, - reply_time -FROM pg_stat_replication; - --- On standby: check recovery status -SELECT pg_is_in_recovery(), - pg_last_wal_receive_lsn(), - pg_last_wal_replay_lsn(), - pg_last_xact_replay_timestamp(), - now() - pg_last_xact_replay_timestamp() AS replication_delay; -``` - -## Logical Replication - -Replicates at the row level (not WAL bytes). Allows selective table replication and cross-version replication. - -```ini -# postgresql.conf on publisher -wal_level = logical -max_replication_slots = 10 -max_wal_senders = 10 -``` - -### Publisher (Source) - -```sql --- Publish specific tables -CREATE PUBLICATION my_pub FOR TABLE users, orders; - --- Publish all tables in a schema (PG15+) -CREATE PUBLICATION schema_pub FOR TABLES IN SCHEMA public; - --- Publish all tables -CREATE PUBLICATION all_pub FOR ALL TABLES; - --- Publish with row filter (PG15+) -CREATE PUBLICATION filtered_pub FOR TABLE orders - WHERE (status = 'active' AND region = 'US'); - --- Publish specific columns (PG15+) -CREATE PUBLICATION partial_pub FOR TABLE users (id, name, email); -``` - -### Subscriber (Target) - -```sql --- Create matching tables first (schema not replicated) - --- Subscribe -CREATE SUBSCRIPTION my_sub - CONNECTION 'host=publisher-host port=5432 dbname=mydb user=replicator password=secret' - PUBLICATION my_pub - WITH (copy_data = true); -- initial data sync - --- Monitor subscription -SELECT * FROM pg_stat_subscription; - --- Check replication slot on publisher -SELECT slot_name, active, restart_lsn, confirmed_flush_lsn -FROM pg_replication_slots; - --- Alter subscription -ALTER SUBSCRIPTION my_sub DISABLE; -ALTER SUBSCRIPTION my_sub ENABLE; -ALTER SUBSCRIPTION my_sub REFRESH PUBLICATION; -- pick up new tables - --- Drop subscription (also drops replication slot on publisher) -DROP SUBSCRIPTION my_sub; -``` - -## pg_basebackup - -```bash -# Full backup in plain format -pg_basebackup -h primary -U replicator -D /backup/base \ - -Fp -Xs -P -c fast - -# Compressed tar format -pg_basebackup -h primary -U replicator -D /backup/base \ - -Ft -z -P -Xs - -# Flags: -# -Fp plain format (directory) -# -Ft tar format -# -z gzip compression (tar only) -# -Xs stream WAL during backup (recommended) -# -P show progress -# -c fast fast checkpoint -# -R create standby.signal + primary_conninfo -``` - -## Failover - -### Manual Failover - -```sql --- On standby: promote to primary -SELECT pg_promote(); --- Or: pg_ctl promote -D /var/lib/postgresql/data -``` - -### Patroni (Automated HA) - -```yaml -# patroni.yml (simplified) -scope: my-cluster -name: node1 - -restapi: - listen: 0.0.0.0:8008 - connect_address: node1:8008 - -etcd: - hosts: etcd1:2379,etcd2:2379,etcd3:2379 - -bootstrap: - dcs: - ttl: 30 - loop_wait: 10 - retry_timeout: 10 - maximum_lag_on_failover: 1048576 # bytes - synchronous_mode: true - postgresql: - use_pg_rewind: true - parameters: - max_connections: 200 - shared_buffers: 4GB - wal_level: replica - - initdb: - - encoding: UTF8 - - data-checksums - -postgresql: - listen: 0.0.0.0:5432 - connect_address: node1:5432 - data_dir: /var/lib/postgresql/data - authentication: - superuser: - username: postgres - password: secret - replication: - username: replicator - password: secret -``` - -```bash -# Patroni commands -patronictl -c /etc/patroni.yml list # show cluster status -patronictl -c /etc/patroni.yml switchover # planned switchover -patronictl -c /etc/patroni.yml failover # manual failover -patronictl -c /etc/patroni.yml reinit node2 # reinitialize a member -``` - -### pg_auto_failover - -```bash -# Monitor node -pg_autoctl create monitor --pgdata /var/lib/monitor --pgport 5000 - -# Primary node -pg_autoctl create postgres --pgdata /var/lib/pg --pgport 5432 \ - --monitor postgres://autoctl@monitor-host:5000/pg_auto_failover - -# Secondary node (auto-joins and syncs) -pg_autoctl create postgres --pgdata /var/lib/pg --pgport 5432 \ - --monitor postgres://autoctl@monitor-host:5000/pg_auto_failover - -# Check state -pg_autoctl show state -``` - -## PgBouncer + Patroni (Connection Routing) - -```ini -; pgbouncer.ini -[databases] -; Use Patroni REST API or DNS for routing -mydb = host=primary-vip port=5432 dbname=mydb -mydb_ro = host=standby1,standby2 port=5432 dbname=mydb - -[pgbouncer] -pool_mode = transaction -max_client_conn = 1000 -default_pool_size = 25 -``` - -Alternative: Use Patroni's built-in HAProxy or consul-template to update PgBouncer config on failover. - -## WAL Archiving and PITR - -### Configure Archiving - -```ini -# postgresql.conf -archive_mode = on -archive_command = 'cp %p /archive/wal/%f' -# Or use pgbackrest, WAL-G, barman: -# archive_command = 'pgbackrest --stanza=mydb archive-push %p' -``` - -### Point-in-Time Recovery - -```bash -# 1. Stop PostgreSQL -pg_ctl stop -D /var/lib/postgresql/data - -# 2. Restore base backup -rm -rf /var/lib/postgresql/data -pg_basebackup ... OR tar xzf base_backup.tar.gz -C /var/lib/postgresql/data - -# 3. Create recovery configuration -cat > /var/lib/postgresql/data/postgresql.auto.conf << EOF -restore_command = 'cp /archive/wal/%f %p' -recovery_target_time = '2026-03-25 14:30:00 UTC' -recovery_target_action = 'promote' -EOF - -# 4. Create recovery signal -touch /var/lib/postgresql/data/recovery.signal - -# 5. Start PostgreSQL (replays WAL to target time) -pg_ctl start -D /var/lib/postgresql/data -``` - -```sql --- Recovery target options (choose one): --- recovery_target_time = '2026-03-25 14:30:00' --- recovery_target_xid = '12345' --- recovery_target_lsn = '0/1A2B3C4D' --- recovery_target_name = 'my_restore_point' - --- Create named restore points -SELECT pg_create_restore_point('before_migration'); -``` diff --git a/plugins/flow/skills/postgres/references/security.md b/plugins/flow/skills/postgres/references/security.md deleted file mode 100644 index 7ccf1ff..0000000 --- a/plugins/flow/skills/postgres/references/security.md +++ /dev/null @@ -1,269 +0,0 @@ -# Security - -## Role Management - -```sql --- Create a login role -CREATE ROLE app_user WITH LOGIN PASSWORD 'strong_password' - VALID UNTIL '2027-01-01' - CONNECTION LIMIT 50; - --- Create a group role (no login) -CREATE ROLE readonly NOLOGIN; -CREATE ROLE readwrite NOLOGIN; - --- Grant privileges to group roles -GRANT CONNECT ON DATABASE mydb TO readonly; -GRANT USAGE ON SCHEMA public TO readonly; -GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly; -ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO readonly; - -GRANT readonly TO readwrite; -- readwrite inherits readonly -GRANT INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO readwrite; -ALTER DEFAULT PRIVILEGES IN SCHEMA public - GRANT INSERT, UPDATE, DELETE ON TABLES TO readwrite; - --- Assign group roles to login roles -GRANT readonly TO app_reader; -GRANT readwrite TO app_writer; - --- Role inheritance (default: INHERIT) --- With INHERIT, member automatically gets parent's privileges --- With NOINHERIT, must SET ROLE explicitly -CREATE ROLE admin NOLOGIN; -GRANT admin TO dba_user; --- dba_user must: SET ROLE admin; to use admin privileges - --- Revoke -REVOKE ALL ON SCHEMA sensitive FROM PUBLIC; -REVOKE INSERT ON large_table FROM app_writer; - --- Check role membership -SELECT r.rolname AS role, - m.rolname AS member -FROM pg_auth_members am -JOIN pg_roles r ON r.oid = am.roleid -JOIN pg_roles m ON m.oid = am.member; -``` - -## Row-Level Security (RLS) - -```sql --- Enable RLS on table (must be explicit) -ALTER TABLE documents ENABLE ROW LEVEL SECURITY; - --- Force RLS even for table owner (optional) -ALTER TABLE documents FORCE ROW LEVEL SECURITY; - --- Tenant isolation policy -CREATE POLICY tenant_isolation ON documents - USING (tenant_id = current_setting('app.tenant_id')::int); --- Set before queries: SET app.tenant_id = '42'; - --- Per-command policies -CREATE POLICY select_own ON documents - FOR SELECT - USING (owner_id = current_user_id()); - -CREATE POLICY insert_own ON documents - FOR INSERT - WITH CHECK (owner_id = current_user_id()); - -CREATE POLICY update_own ON documents - FOR UPDATE - USING (owner_id = current_user_id()) -- which rows can be seen - WITH CHECK (owner_id = current_user_id()); -- which rows can be written - -CREATE POLICY delete_own ON documents - FOR DELETE - USING (owner_id = current_user_id()); - --- Admin bypass policy (permissive policies OR together) -CREATE POLICY admin_all ON documents - FOR ALL - TO admin_role - USING (true) - WITH CHECK (true); - --- Restrictive policy (AND with permissive policies, PG10+) -CREATE POLICY active_only ON documents AS RESTRICTIVE - FOR ALL - USING (NOT is_deleted); - --- Multiple USING policies of same type: PERMISSIVE policies OR together, --- then AND with any RESTRICTIVE policies - --- Helper function for RLS -CREATE FUNCTION current_user_id() RETURNS bigint -LANGUAGE sql STABLE -AS $$ SELECT current_setting('app.user_id')::bigint $$; -``` - -## Column-Level Privileges - -```sql --- Grant SELECT on specific columns only -GRANT SELECT (id, name, email) ON users TO app_public; - --- Deny access to sensitive columns -REVOKE SELECT ON users FROM app_public; -GRANT SELECT (id, name, department) ON users TO app_public; --- app_public cannot see: email, salary, ssn, etc. - --- Security barrier views (prevent optimizer from leaking data) -CREATE VIEW public_users WITH (security_barrier) AS - SELECT id, name, department - FROM users - WHERE NOT is_deleted; - -GRANT SELECT ON public_users TO app_public; --- security_barrier prevents filter pushdown that could leak hidden rows --- via side-channel (e.g., function that raises error on certain values) -``` - -## SSL/TLS Configuration - -### Server Side (postgresql.conf) - -```ini -ssl = on -ssl_cert_file = '/etc/ssl/server.crt' -ssl_key_file = '/etc/ssl/server.key' -ssl_ca_file = '/etc/ssl/ca.crt' # for client cert verification -ssl_min_protocol_version = 'TLSv1.2' -``` - -### pg_hba.conf (Require SSL) - -```text -# Require SSL for all remote connections -hostssl all all 0.0.0.0/0 scram-sha-256 -hostssl all all ::/0 scram-sha-256 - -# Require client certificates -hostssl all all 0.0.0.0/0 cert clientcert=verify-full -``` - -### Client Certificates - -```bash -# Generate client certificate -openssl req -new -key client.key -out client.csr -subj "/CN=app_user" -openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial \ - -out client.crt -days 365 - -# Connect with client cert -psql "postgresql://app_user@host/db?sslmode=verify-full&sslcert=client.crt&sslkey=client.key&sslrootcert=ca.crt" -``` - -## Password Authentication - -```sql --- scram-sha-256 (recommended, default in PG14+) --- Set in postgresql.conf: --- password_encryption = 'scram-sha-256' - --- pg_hba.conf: --- host all all 0.0.0.0/0 scram-sha-256 - --- Change password -ALTER ROLE app_user WITH PASSWORD 'new_strong_password'; - --- Password expiration -ALTER ROLE app_user VALID UNTIL '2027-06-01'; - --- Check password encryption -SELECT rolname, rolpassword ~ '^SCRAM-SHA-256' AS is_scram -FROM pg_authid -WHERE rolcanlogin; -``` - -## pgAudit (Audit Logging) - -```sql --- Install --- shared_preload_libraries = 'pgaudit' (requires restart) -CREATE EXTENSION pgaudit; - --- Session-based logging (all DDL and role changes) -SET pgaudit.log = 'ddl, role'; - --- Role-based auditing (audit specific roles) -CREATE ROLE auditor NOLOGIN; -SET pgaudit.role = 'auditor'; - --- Grant auditor access to tables you want to audit -GRANT SELECT, INSERT, UPDATE, DELETE ON orders TO auditor; --- Now all DML on orders by any user is logged - --- Object-based auditing -SET pgaudit.log = 'write, ddl'; --- Logs: INSERT, UPDATE, DELETE, TRUNCATE, and all DDL - --- Log classes: --- read - SELECT, COPY FROM --- write - INSERT, UPDATE, DELETE, TRUNCATE, COPY TO --- function - function calls and DO blocks --- role - GRANT, REVOKE, CREATE/ALTER/DROP ROLE --- ddl - all DDL not in other classes --- misc - DISCARD, FETCH, CHECKPOINT, VACUUM, SET --- all - everything -``` - -## pgcrypto (Encryption) - -```sql -CREATE EXTENSION pgcrypto; - --- Hashing -SELECT crypt('mypassword', gen_salt('bf', 10)); -- bcrypt --- Result: '$2a$10$...' - --- Verify password -SELECT crypt('mypassword', stored_hash) = stored_hash AS valid; - --- SHA-256 hashing -SELECT encode(digest('data', 'sha256'), 'hex'); - --- Symmetric encryption (AES) --- Encrypt -UPDATE sensitive_data SET encrypted_col = - pgp_sym_encrypt(secret_value, 'encryption_key'); - --- Decrypt -SELECT pgp_sym_decrypt(encrypted_col, 'encryption_key') -FROM sensitive_data; - --- Generate random bytes/UUIDs -SELECT gen_random_uuid(); -- built-in PG13+ -SELECT encode(gen_random_bytes(32), 'hex'); -- random token - --- HMAC -SELECT encode(hmac('message', 'secret_key', 'sha256'), 'hex'); -``` - -## Security Best Practices Checklist - -```sql --- 1. Revoke default public access -REVOKE CREATE ON SCHEMA public FROM PUBLIC; -REVOKE ALL ON DATABASE mydb FROM PUBLIC; - --- 2. Use separate roles for app, admin, migrations --- app_role: minimal DML privileges --- migration_role: DDL privileges --- admin_role: superuser-like, used rarely - --- 3. Set statement_timeout to prevent long-running queries -ALTER ROLE app_user SET statement_timeout = '30s'; - --- 4. Restrict function execution -REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; -GRANT EXECUTE ON FUNCTION safe_function() TO app_role; - --- 5. Check for superusers (minimize these) -SELECT rolname FROM pg_roles WHERE rolsuper; - --- 6. Audit default privileges -SELECT * FROM pg_default_acl; -``` diff --git a/plugins/flow/skills/pyapp/SKILL.md b/plugins/flow/skills/pyapp/SKILL.md deleted file mode 100644 index 3e9342b..0000000 --- a/plugins/flow/skills/pyapp/SKILL.md +++ /dev/null @@ -1,147 +0,0 @@ ---- -name: pyapp -description: "Use when building standalone Python executables with PyApp, bundling Python runtimes, preparing air-gapped or multi-architecture binaries, patching PyApp defaults, or compiling single-binary assets." ---- - -# PyApp Standalone Binaries - -Enable building self-contained, air-gapped, multi-architecture standalone executables for any Python application using **PyApp** and **uv**. - ---- - -## Overview - -Standard `pyapp` installation bootstraps the environment on first run, which usually requires internet access. For **air-gapped** or **network-isolated** environments, you must embed the entire Python distribution and its dependencies ahead of time. - -This skill documents the **Bundle-Patch-Compile** workflow: - -1. **Bundle**: Download a standalone Python build, install dependencies into its `site-packages`, and repackage. -2. **Patch**: Modify the PyApp source code to enforce custom install locations or isolation defaults. -3. **Compile**: Compile the patched PyApp binary with the bundled distribution embedded. - ---- - -## Architecture & Philosophy - -### The Packaged Distribution - -Instead of installing at runtime, we build a **hybrid distribution**: - -* A basic standalone Python distribution (e.g., from `python-build-standalone`). -* Pre-populated `site-packages` via `uv pip install --target`. -* This avoids running any package managers on first execution. - ---- - - - -## Configuration - -### 1. Standard Settings - -In your `pyproject.toml`, configure the Hatch target or custom builder to use specific variables. - - - -```toml -[tool.hatch.build.targets.binary] -scripts = ["myapp"] -pyapp-version = "v0.29.0" - -[tool.hatch.build.targets.binary.env] -PYAPP_DISTRIBUTION_EMBED = "1" -PYAPP_FULL_ISOLATION = "1" -PYAPP_ALLOW_UPDATES = "1" -``` - - - ---- - -## Step-by-Step Workflow - -### Phase 1: Bundling (Prep the Runtime) - -To enable fully offline operations, follow these steps using an automation script (see `scripts/bundler.py`): - -1. **Download Standalone Python**: Acquire a compatible `install_only_stripped` version for the Target Rust arch (e.g., `x86_64-unknown-linux-gnu`). -2. **Install Deps Off-Target**: Use `uv pip install` with specific cross-compilation flags: - * `--target ` - * `--python-platform ` - * `--upgrade` -3. **Repackage**: Compress the resulting layout back into a `.tar.gz`. - -### Phase 2: PyApp Patching (Enforce Paths) - -By default, PyApp stores user data in standard local data folders. If you require strict isolation (e.g., `~/.myapp`), you can **patch the PyApp source code** just before `cargo build`: - - - -```python -# Conceptual example of patching src/app.rs -import re -content = app_rs.read_text() -pattern = re.compile(r"platform_dirs\(\)\s*\.data_local_dir\(\)...") -replacement = "std::path::PathBuf::from(\"~/.myapp\")" -app_rs.write_text(pattern.sub(replacement, content)) -``` - - - -### Phase 3: Compiling - -To maintain maximum glibc backward-compatibility (e.g., supporting RHEL 7+ / manylinux2014 baseline): - -* Use **Zig** as the linker trigger: `cargo zigbuild --release --target .2.17` - - - ---- - -## CI/CD Integration - -Ensure your GitHub Action includes: - -1. An upstream build step creating target-agnostic `.whl` files. -2. A cross-target build matrix (`x86_64-linux-gnu`, `aarch64-linux-gnu`, `aarch64-apple-darwin`, etc.). -3. Zig setup steps for robust glibc pin targeting. - -> [!TIP] -> Always test inside a non-networked container: -> `docker run --network none -v $(pwd):/app ubuntu:20.04 /app/myapp-binary --help` - ---- - -## Provided Resources - -* **Bundler Template**: `scripts/bundler.py` (in this skill directory) -* **CI Matrix Action Example**: `examples/release-action.yml` (in this skill directory) - -## Shared Styleguide Baseline - -* Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -* [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -* [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -* [Docker](https://github.com/cofin/flow/blob/main/templates/styleguides/tools/docker.md) -* Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -* **Use non-root user in production images** -- When containerizing the resulting binary, ensure it runs as a non-privileged user to minimize security risks. -* **Prefer multi-stage Docker builds** -- Separate the build environment (with Cargo and Zig) from the final runtime image to keep the production artifact small. -* **Target specific glibc versions with Zig** -- Use `cargo zigbuild --target .2.17` to ensure compatibility with older Linux distributions (e.g., RHEL 7+). -* **Embed all dependencies for air-gapped use** -- Set `PYAPP_DISTRIBUTION_EMBED = "1"` to ensure the binary is fully self-contained and does not require internet access on first run. -* **Validate binary size** -- Monitor the size of the embedded distribution; strip unnecessary symbols and files (e.g., `.pyc`, `__pycache__`, tests) to keep the executable manageable. - - - -## Validation Checkpoint - -* [ ] Binary runs successfully in a network-isolated (`--network none`) environment -* [ ] glibc compatibility is verified using `ldd --version` on the target platform -* [ ] No root privileges are required to execute the binary -* [ ] All required Python dependencies are included in the embedded `site-packages` -* [ ] Binary size is within the expected range for the bundled distribution -* [ ] Custom install paths (if patched) are correctly respected by the application - diff --git a/plugins/flow/skills/pyapp/agents/openai.yaml b/plugins/flow/skills/pyapp/agents/openai.yaml deleted file mode 100644 index 68caab2..0000000 --- a/plugins/flow/skills/pyapp/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "PyApp" - short_description: "Standalone Python executables, uv runtime bundling, and air-gapped builds" diff --git a/plugins/flow/skills/pyapp/examples/release-action.yml b/plugins/flow/skills/pyapp/examples/release-action.yml deleted file mode 100644 index 7075201..0000000 --- a/plugins/flow/skills/pyapp/examples/release-action.yml +++ /dev/null @@ -1,104 +0,0 @@ -# .github/workflows/release.yml -name: Release Standalone Binaries - -on: - push: - tags: - - 'v*' - workflow_dispatch: - -jobs: - # 1. Build the standard Wheel first - build-wheel: - name: Build Python Wheel - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Install uv - uses: astral-sh/setup-uv@v5 - - name: Build - run: uv build - - uses: actions/upload-artifact@v4 - with: - name: wheels - path: dist/*.whl - - # 2. Build Standalone Binaries via Matrix - build-binaries: - name: Build for ${{ matrix.target }} - runs-on: ${{ matrix.os }} - needs: [build-wheel] - strategy: - fail-fast: false - matrix: - include: - - target: x86_64-unknown-linux-gnu - os: ubuntu-latest - artifact: myapp-x86_64-linux - - target: aarch64-unknown-linux-gnu - os: ubuntu-latest - artifact: myapp-aarch64-linux - - env: - PYAPP_VERSION: "v0.29.0" - PYAPP_REPO: "pyapp-source" - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Clone PyApp - run: git clone --depth 1 --branch ${{ env.PYAPP_VERSION }} https://github.com/ofek/pyapp ${{ env.PYAPP_REPO }} - - - name: Install uv - uses: astral-sh/setup-uv@v5 - - - name: Install Zig (For Linux Backward Compatibility) - if: contains(matrix.target, 'linux-gnu') - uses: goto-bus-stop/setup-zig@v2 - with: - version: 0.13.0 - - - name: Install cargo-zigbuild - if: contains(matrix.target, 'linux-gnu') - run: uv tool install cargo-zigbuild - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - with: - targets: ${{ matrix.target }} - - - name: Download wheel artifact - uses: actions/download-artifact@v4 - with: - name: wheels - path: dist/ - - - name: Generate requirements.txt - run: | - uv export --no-dev --no-emit-project > dist/requirements.txt - echo "${{ github.workspace }}/dist/*.whl" >> dist/requirements.txt - - - name: Bundle Python Distribution (Offline Packager) - run: | - uv run scripts/bundler.py build \ - --target ${{ matrix.target }} \ - --requirements dist/requirements.txt \ - --output dist/python-dist-${{ matrix.target }}.tar.gz \ - --pyapp-dir ${{ env.PYAPP_REPO }} \ - --install-dir "~/.myapp" - - - name: Compile PyApp Binary (Linux GNU) - if: contains(matrix.target, 'linux-gnu') - working-directory: ${{ env.PYAPP_REPO }} - env: - # Instruct PyApp to embed the bundle we just created - PYAPP_DISTRIBUTION_PATH: ${{ github.workspace }}/dist/python-dist-${{ matrix.target }}.tar.gz - PYAPP_SKIP_INSTALL: "true" - run: cargo zigbuild --release --target ${{ matrix.target }}.2.17 - - - name: Upload binary artifact - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.artifact }} - path: ${{ env.PYAPP_REPO }}/target/${{ matrix.target }}/release/pyapp diff --git a/plugins/flow/skills/pyapp/scripts/bundler.py b/plugins/flow/skills/pyapp/scripts/bundler.py deleted file mode 100644 index 595fddf..0000000 --- a/plugins/flow/skills/pyapp/scripts/bundler.py +++ /dev/null @@ -1,208 +0,0 @@ -#!/usr/bin/env python3 -# /// script -# dependencies = [ -# "rich-click", -# "rich", -# "tomli; python_version < '3.11'", -# ] -# /// -"""Bundle Python dependencies into a standalone distribution for PyApp embedding.""" - -import contextlib -import os -import platform as host_platform -import re -import shutil -import subprocess -import sys -import tarfile -import tempfile -import urllib.error -import urllib.request -import zipfile -from pathlib import Path -from typing import Any - -import rich_click as click -from rich.console import Console -from rich.rule import Rule - -try: - import tomllib -except ModuleNotFoundError: # pragma: no cover - import tomli as tomllib # type: ignore[no-redef,import-not-found] - - -DEFAULT_PYTHON_VERSION = "3.13" -DEFAULT_INSTALL_ROOT = "~/.local" -DEFAULT_CACHE_DIRNAME = ".cache/bundler" - -# Target Standalone Python Distributions -# Update these URLs as new releases become available. -DEFAULT_URLS: dict[str, str] = { - "x86_64-unknown-linux-gnu": "https://github.com/astral-sh/python-build-standalone/releases/download/20251014/cpython-3.13.9%2B20251014-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz", - "aarch64-unknown-linux-gnu": "https://github.com/astral-sh/python-build-standalone/releases/download/20251014/cpython-3.13.9%2B20251014-aarch64-unknown-linux-gnu-install_only_stripped.tar.gz", - "x86_64-apple-darwin": "https://github.com/astral-sh/python-build-standalone/releases/download/20251014/cpython-3.13.9%2B20251014-x86_64-apple-darwin-install_only_stripped.tar.gz", - "aarch64-apple-darwin": "https://github.com/astral-sh/python-build-standalone/releases/download/20251014/cpython-3.13.9%2B20251014-aarch64-apple-darwin-install_only_stripped.tar.gz", - "x86_64-pc-windows-msvc": "https://github.com/astral-sh/python-build-standalone/releases/download/20251014/cpython-3.13.9%2B20251014-x86_64-pc-windows-msvc-install_only_stripped.tar.gz", -} - -# Cross-Installation Target Platforms for uv pip -DEFAULT_PLATFORMS: dict[str, str] = { - "x86_64-unknown-linux-gnu": "x86_64-manylinux_2_28", - "aarch64-unknown-linux-gnu": "aarch64-manylinux_2_28", - "x86_64-apple-darwin": "x86_64-apple-darwin", - "aarch64-apple-darwin": "aarch64-apple-darwin", - "x86_64-pc-windows-msvc": "x86_64-pc-windows-msvc", -} - -console = Console() - - -def left_aligned_rule(title: str, style: str = "blue") -> None: - rule = Rule(title, style=style, align="left") - console.print(rule) - - -def resolve_project_dir(project_dir: str | None) -> Path: - base = Path(project_dir or Path.cwd()).expanduser().resolve() - if not base.exists(): - msg = f"Project directory not found: {base}" - raise click.ClickException(msg) - return base - - -def load_pyproject(project_dir: Path) -> dict[str, Any]: - pyproject_path = project_dir / "pyproject.toml" - if not pyproject_path.exists(): - return {} - try: - return tomllib.loads(pyproject_path.read_text(encoding="utf-8")) - except (OSError, tomllib.TOMLDecodeError) as exc: - msg = f"Failed to parse {pyproject_path}: {exc}" - raise click.ClickException(msg) from exc - - -def detect_project_name(project_dir: Path, override: str | None) -> str: - if override: - return override - pyproject = load_pyproject(project_dir) - project_name = pyproject.get("project", {}).get("name") - return project_name or project_dir.name - - -def download_with_retry(url: str, dest: Path, max_retries: int = 3) -> None: - for attempt in range(max_retries): - try: - console.print(f"[blue]Downloading[/] {url}...") - urllib.request.urlretrieve(url, dest) - if dest.exists() and dest.stat().st_size > 0: - return - except urllib.error.URLError as exc: - if attempt < max_retries - 1: - console.print(f"[yellow]Warning:[/] Retry {attempt+1}: {exc}") - else: - raise click.ClickException(f"Download failed: {exc}") from exc - - -def find_site_packages(python_root: Path, target: str, python_version: str) -> Path: - if "windows" in target: - site_packages = python_root / "Lib" / "site-packages" - else: - major_minor = ".".join(python_version.split(".")[:2]) - site_packages = python_root / "lib" / f"python{major_minor}" / "site-packages" - - if site_packages.exists(): - return site_packages - - for root, dirs, _ in os.walk(python_root): - if "site-packages" in dirs: - return Path(root) / "site-packages" - - raise click.ClickException("Could not locate site-packages") - - -def install_requirements(requirements_path: Path, site_packages: Path, platform: str, python_version: str) -> None: - pip_cmd = [ - "uv", "pip", "install", "--color", "never", - "-r", str(requirements_path), - "--target", str(site_packages), - "--python-platform", platform, - "--python-version", python_version, - "--upgrade", "--no-deps" - ] - try: - subprocess.check_call(pip_cmd) - except subprocess.CalledProcessError as exc: - raise click.ClickException(f"pip install failed: {exc}") from exc - - -def patch_pyapp_install_dir(pyapp_dir: Path, install_dir: str) -> None: - """Patch PyApp to use a custom default installation directory.""" - app_rs = pyapp_dir / "src" / "app.rs" - if not app_rs.exists(): - raise click.ClickException(f"PyApp source not found at {app_rs}") - - content = app_rs.read_text(encoding="utf-8") - pattern = re.compile( - r"platform_dirs\(\)\s*\.data_local_dir\(\)\s*" - r"\.join\(project_name\(\)\)" - ) - if not pattern.search(content): - # Fallback or Warning if structure changed in newer PyApp - console.print("[yellow]Warning:[/] Could not locate app.rs install block to patch.") - return - - replacement = f"std::path::PathBuf::from(\"{install_dir}\")" - updated = pattern.sub(replacement, content, count=1) - app_rs.write_text(updated, encoding="utf-8") - console.print(f"[green]Patched[/] PyApp install dir -> {install_dir}") - - -@click.group(help="Bundle dependencies for offline PyApp.") -def cli() -> None: pass - - -@cli.command("build") -@click.option("--target", required=True, help="Rust target architecture") -@click.option("--requirements", type=click.Path(path_type=Path), required=True) -@click.option("--output", help="Output path (.tar.gz)") -@click.option("--install-dir", help="Custom static installation path (e.g., ~/.myapp)") -@click.option("--pyapp-dir", type=click.Path(path_type=Path), help="Patch PyApp source checkout before build") -def build_bundle(target: str, requirements: Path, output: str | None, install_dir: str | None, pyapp_dir: Path | None) -> None: - """Bundle dependencies and optionally patch PyApp source.""" - - if pyapp_dir and install_dir: - patch_pyapp_install_dir(pyapp_dir, install_dir) - - url = DEFAULT_URLS.get(target) - platform = DEFAULT_PLATFORMS.get(target) - - if not url or not platform: - raise click.ClickException(f"Unsupported target: {target}") - - work_dir = Path(tempfile.mkdtemp(prefix="pyapp-bundler-")) - try: - archive_path = work_dir / "python.tar.gz" - download_with_retry(url, archive_path) - - extract_dir = work_dir / "extracted" - with tarfile.open(archive_path, "r:gz") as tar: - tar.extractall(extract_dir) - - python_root = extract_dir / "python" - site_packages = find_site_packages(python_root, target, DEFAULT_PYTHON_VERSION) - - install_requirements(requirements, site_packages, platform, DEFAULT_PYTHON_VERSION) - - out_path = Path(output or f"python-dist-{target}.tar.gz") - with tarfile.open(out_path, "w:gz") as tar: - tar.add(python_root, arcname="python") - - console.print(f"[bold green]Created offline distribution:[/] {out_path}") - finally: - shutil.rmtree(work_dir) - - -if __name__ == "__main__": - cli() diff --git a/plugins/flow/skills/pydantic/SKILL.md b/plugins/flow/skills/pydantic/SKILL.md deleted file mode 100644 index 853311d..0000000 --- a/plugins/flow/skills/pydantic/SKILL.md +++ /dev/null @@ -1,451 +0,0 @@ ---- -name: pydantic -description: "Use when defining Pydantic models, BaseModel, BaseSettings, pydantic_settings, field validators, model validators, serializers, TypeAdapter, settings env config, external data validation, or v1-to-v2 migration." ---- - -# Pydantic Skill - -Pydantic v2 is a high-performance Python data validation library with first-class support for type hints, environment configuration via `BaseSettings`, and a complete rewrite from v1 with significant API changes. - -## Code Style Rules - -- Use PEP 604 for unions: `T | None` (not `Optional[T]`) -- Use `model_config = ConfigDict(...)` not inner `class Config` -- Use `model_validate()` for untrusted external input (not `__init__`) -- Use `Annotated[T, Field(...)]` over `Field(...)` as a default value - -## Quick Reference - -### ConfigDict - -```python -from pydantic import BaseModel, ConfigDict - -class MyModel(BaseModel): - model_config = ConfigDict( - from_attributes=True, # replaces orm_mode=True (v1) - extra="forbid", # "allow", "ignore", or "forbid" - validate_assignment=True, # re-validate on attribute set - arbitrary_types_allowed=True, - populate_by_name=True, # allow field name or alias - str_strip_whitespace=True, - frozen=True, # make model immutable - ) -``` - -### Validators - -```python -from pydantic import BaseModel, field_validator, model_validator, computed_field - -class Order(BaseModel): - quantity: int - unit_price: float - discount: float = 0.0 - - @field_validator("quantity") - @classmethod - def quantity_must_be_positive(cls, v: int) -> int: - if v <= 0: - raise ValueError("quantity must be > 0") - return v - - @field_validator("discount") - @classmethod - def discount_in_range(cls, v: float) -> float: - if not (0.0 <= v <= 1.0): - raise ValueError("discount must be between 0 and 1") - return v - - @model_validator(mode="before") - @classmethod - def check_raw_data(cls, data: dict) -> dict: - # runs before individual field validation - if "unit_price" in data and data["unit_price"] < 0: - raise ValueError("unit_price cannot be negative") - return data - - @model_validator(mode="after") - def check_total(self) -> "Order": - # runs after all fields are validated; self is fully populated - if self.quantity * self.unit_price > 1_000_000: - raise ValueError("order total exceeds limit") - return self - - @computed_field - @property - def total(self) -> float: - return self.quantity * self.unit_price * (1 - self.discount) -``` - -### Serialization - -```python -model = Order(quantity=2, unit_price=9.99) - -# model_dump options -model.model_dump() -model.model_dump(by_alias=True) # use field aliases in output -model.model_dump(exclude_unset=True) # omit fields not explicitly set -model.model_dump(exclude_none=True) # omit None-valued fields -model.model_dump(include={"quantity"}) # only these fields -model.model_dump(exclude={"discount"}) # all except these fields -model.model_dump(mode="json") # JSON-serializable types - -# JSON string -model.model_dump_json() -model.model_dump_json(by_alias=True, exclude_none=True) - -# Custom field serializer -from pydantic import field_serializer - -class Item(BaseModel): - price: float - - @field_serializer("price") - def serialize_price(self, v: float) -> str: - return f"${v:.2f}" - -# Custom model serializer -from pydantic import model_serializer - -class Item(BaseModel): - name: str - price: float - - @model_serializer - def custom_serialize(self) -> dict: - return {"label": self.name, "cost": self.price} -``` - -### TypeAdapter — Bulk Validation - -```python -from pydantic import TypeAdapter - -# Significantly faster than calling MyModel(...) in a loop -adapter = TypeAdapter(list[MyModel]) -items = adapter.validate_python(raw_list) # from Python objects -items = adapter.validate_json(json_string) # from JSON bytes/str - -# Also works for non-model types -int_adapter = TypeAdapter(int) -int_adapter.validate_python("42") # returns 42 -``` - -### Custom Types — Annotated Pattern - -```python -from typing import Annotated -from pydantic import BeforeValidator, PlainSerializer, WithJsonSchema -from pydantic.functional_validators import AfterValidator - -def parse_comma_list(v: str | list) -> list[str]: - if isinstance(v, str): - return [item.strip() for item in v.split(",")] - return v - -def validate_positive(v: int) -> int: - assert v > 0, "must be positive" - return v - -CommaList = Annotated[ - list[str], - BeforeValidator(parse_comma_list), - WithJsonSchema({"type": "string", "description": "Comma-separated values"}), -] - -PositiveInt = Annotated[int, AfterValidator(validate_positive)] - -TagList = Annotated[ - list[str], - BeforeValidator(parse_comma_list), - PlainSerializer(lambda v: ",".join(v), return_type=str), -] -``` - -### BaseSettings — Environment Configuration - -```python -from pydantic_settings import BaseSettings, SettingsConfigDict - -class AppSettings(BaseSettings): - model_config = SettingsConfigDict( - env_file=".env", - env_file_encoding="utf-8", - env_nested_delimiter="__", # DATABASE__HOST → database.host - case_sensitive=False, - extra="ignore", - ) - - debug: bool = False - secret_key: str - allowed_hosts: list[str] = ["localhost"] - -settings = AppSettings() # reads from env + .env file -``` - -Priority chain: **explicit args > env vars > .env file > defaults** - -See [BaseSettings Deep Reference](references/basesettings.md) for nested models, secrets directory, multiple env files, and custom sources. - -### PrivateAttr — Non-Validated Internal State - -```python -from pydantic import BaseModel, PrivateAttr - -class Connection(BaseModel): - host: str - port: int = 5432 - _client: object = PrivateAttr(default=None) - - def connect(self) -> None: - self._client = create_client(self.host, self.port) -``` - -### Constrained Types via Field - -```python -from pydantic import BaseModel, Field - -class Product(BaseModel): - name: str = Field(min_length=1, max_length=100) - price: float = Field(gt=0, le=999_999.99) - stock: int = Field(ge=0) - sku: str = Field(pattern=r"^[A-Z]{3}-\d{4}$") - tags: list[str] = Field(min_length=0, max_length=10) -``` - -### Special Types - -```python -from pydantic import EmailStr, SecretStr, AnyUrl, AnyHttpUrl -from pydantic import PastDate, FutureDate, AwareDatetime, ByteSize - -class UserProfile(BaseModel): - email: EmailStr # validates email format - password: SecretStr # hidden in repr/serialization - website: AnyHttpUrl | None = None - birth_date: PastDate | None = None - appointment: AwareDatetime | None = None # timezone-aware datetime - avatar_size: ByteSize = "5MB" # "5MB", "1GiB", etc. -``` - -### V1 → V2 Migration Quick Reference - -| V1 | V2 | -|----|-----| -| `class Config: orm_mode = True` | `model_config = ConfigDict(from_attributes=True)` | -| `@root_validator` | `@model_validator(mode='before'/'after')` | -| `@validator` | `@field_validator` | -| `.dict()` | `.model_dump()` | -| `.json()` | `.model_dump_json()` | -| `.parse_obj(data)` | `.model_validate(data)` | -| `.parse_raw(json_str)` | `.model_validate_json(json_str)` | -| `__fields__` | `model_fields` | -| `Field(alias=..., allow_population_by_field_name=True)` | `Field(alias=...) + ConfigDict(populate_by_name=True)` | - -See [V1→V2 Migration Guide](references/migration-v1-v2.md) for full mapping, breaking changes, and coexistence patterns. - -### Performance - -```python -# Prefer model_validate() for untrusted/external data — skips Python __init__ overhead -obj = MyModel.model_validate(raw_dict) - -# TypeAdapter for batch operations — do NOT use list comprehension with __init__ -adapter = TypeAdapter(list[MyModel]) -items = adapter.validate_python(raw_list) # single validation pass - -# Selective model_dump — avoid serializing the whole model if you only need a few fields -subset = obj.model_dump(include={"id", "name"}) -``` - - - -## Workflow - -### Step 1: Define Models - -Create `BaseModel` subclasses with type-annotated fields. Use `ConfigDict` at the top of each model to set behavior. Prefer `Annotated[T, Field(...)]` over `Field(...)` as default values for reusability. - -### Step 2: Add Validators - -Add `@field_validator` for single-field rules. Use `@model_validator(mode='before')` to preprocess raw input dicts and `@model_validator(mode='after')` for cross-field invariants on the fully constructed model. Add `@computed_field` for derived read-only properties. - -### Step 3: Handle Serialization - -Use `model_dump(exclude_unset=True)` when patching. Use `by_alias=True` when the API consumer expects camelCase or snake_case aliases. Use `@field_serializer` or `@model_serializer` for custom output formats. - -### Step 4: Configure Settings - -Create a `BaseSettings` subclass in `pydantic_settings` for environment-driven config. Set `env_nested_delimiter="__"` to support nested models from flat env vars. Load once at startup and inject as a dependency. - -### Step 5: Validate - -Run `mypy` or `pyright` with the pydantic plugin. Use `TypeAdapter` when validating collections for performance. Run tests to confirm validator error messages are user-friendly. - - - - - -## Guardrails - -- **Use `ConfigDict` not inner `class Config`** -- `class Config` is the V1 pattern and silently ignored in some V2 contexts. -- **Use `model_validate()` for untrusted input** -- it is faster and safer than calling `__init__` directly; triggers full validation pipeline. -- **Use `BaseSettings` for environment configuration** -- never read `os.environ` manually in application code; BaseSettings handles type coercion, defaults, and `.env` loading. -- **Use `TypeAdapter` for bulk operations** -- never use a list comprehension calling `MyModel(...)` or `MyModel.model_validate(...)` in a loop; a single `TypeAdapter(list[MyModel]).validate_python(data)` is significantly faster. -- **Never mix V1 and V2 patterns in the same model** -- mixing `@validator` (V1) with `@field_validator` (V2) in the same class causes silent misbehavior or errors. -- **Use `Annotated[T, Field(...)]` over `Field(...)` as a default value** -- the `Annotated` pattern allows reuse as a type alias and avoids mutable default pitfalls. -- **Use `SecretStr` for passwords and tokens** -- prevents accidental leakage in logs, repr, and serialization. -- **Avoid `model_dump(mode='json')` in hot paths** -- use `model_dump_json()` directly when you need a JSON string; it skips the intermediate dict. - - - - - -### Validation Checkpoint - -Before delivering Pydantic code, verify: - -- [ ] `model_config = ConfigDict(...)` used (not inner `class Config`) -- [ ] `model_validate()` used for external/untrusted data (not bare `__init__`) -- [ ] `TypeAdapter` used for bulk list/collection validation -- [ ] No V1 patterns (`@validator`, `@root_validator`, `.dict()`, `.json()`) in new code -- [ ] `BaseSettings` used for environment configuration, not `os.environ` -- [ ] Passwords/tokens use `SecretStr` -- [ ] `Annotated[T, Field(...)]` pattern used for constrained fields -- [ ] `@computed_field` used for derived properties (not plain `@property`) - - - - - -## Example - -**Task:** User registration model with validation + app settings loaded from environment. - -```python -from __future__ import annotations # NOT used — pydantic needs runtime type eval - -from typing import Annotated -from pydantic import ( - BaseModel, - ConfigDict, - EmailStr, - Field, - SecretStr, - computed_field, - field_validator, - model_validator, -) -from pydantic_settings import BaseSettings, SettingsConfigDict - - -# --- Reusable constrained type aliases --- -Username = Annotated[str, Field(min_length=3, max_length=32, pattern=r"^\w+$")] -Password = Annotated[SecretStr, Field(min_length=8)] - - -# --- Domain model --- -class UserRegistration(BaseModel): - model_config = ConfigDict(str_strip_whitespace=True) - - username: Username - email: EmailStr - password: Password - confirm_password: Password - - @field_validator("username") - @classmethod - def username_not_reserved(cls, v: str) -> str: - reserved = {"admin", "root", "system"} - if v.lower() in reserved: - raise ValueError(f"'{v}' is a reserved username") - return v - - @model_validator(mode="after") - def passwords_match(self) -> "UserRegistration": - if self.password != self.confirm_password: - raise ValueError("passwords do not match") - return self - - @computed_field - @property - def display_name(self) -> str: - return self.username.capitalize() - - -# --- Settings from environment --- -class DatabaseSettings(BaseModel): - host: str = "localhost" - port: int = 5432 - name: str = "app" - user: str = "postgres" - password: SecretStr = SecretStr("postgres") - - @computed_field - @property - def url(self) -> str: - pwd = self.password.get_secret_value() - return f"postgresql+asyncpg://{self.user}:{pwd}@{self.host}:{self.port}/{self.name}" - - -class AppSettings(BaseSettings): - model_config = SettingsConfigDict( - env_file=".env", - env_nested_delimiter="__", - extra="ignore", - ) - - debug: bool = False - secret_key: SecretStr - database: DatabaseSettings = DatabaseSettings() - allowed_hosts: list[str] = ["localhost"] - - -# --- Usage --- -# Validate untrusted input -registration = UserRegistration.model_validate({ - "username": "alice", - "email": "alice@example.com", - "password": "s3cure!pw", - "confirm_password": "s3cure!pw", -}) - -# Load settings once at startup -# DATABASE__HOST=db.prod DATABASE__PORT=5433 SECRET_KEY=xyz python app.py -settings = AppSettings() -db_url = settings.database.url -``` - - - ---- - -## References Index - -For detailed guides and configuration examples, refer to the following documents in `references/`: - -- **[BaseSettings Deep Reference](references/basesettings.md)** -- SettingsConfigDict options, nested models, secrets directory, multiple env files, custom sources, and full working example. -- **[V1→V2 Migration Guide](references/migration-v1-v2.md)** -- Full API mapping table, breaking changes, gotchas, and coexistence patterns during incremental migration. - ---- - -## Official References - -- -- -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/pydantic/agents/openai.yaml b/plugins/flow/skills/pydantic/agents/openai.yaml deleted file mode 100644 index 4a499b0..0000000 --- a/plugins/flow/skills/pydantic/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Pydantic" - short_description: "Pydantic v2 models, settings, validators, TypeAdapter, and migrations" diff --git a/plugins/flow/skills/pydantic/references/basesettings.md b/plugins/flow/skills/pydantic/references/basesettings.md deleted file mode 100644 index aca5afb..0000000 --- a/plugins/flow/skills/pydantic/references/basesettings.md +++ /dev/null @@ -1,312 +0,0 @@ -# BaseSettings Deep Reference - -`BaseSettings` (from `pydantic-settings`) extends `BaseModel` with automatic population from environment variables, `.env` files, secrets directories, and custom sources. - -## Installation - -```bash -pip install pydantic-settings -``` - -## SettingsConfigDict Options - -```python -from pydantic_settings import BaseSettings, SettingsConfigDict - -class AppSettings(BaseSettings): - model_config = SettingsConfigDict( - # --- .env file loading --- - env_file=".env", # path or tuple of paths (evaluated in order) - env_file_encoding="utf-8", - - # --- Nested model delimiter --- - env_nested_delimiter="__", # DATABASE__HOST → database.host - - # --- Key matching --- - case_sensitive=False, # default: False (env vars are case-insensitive) - env_prefix="APP_", # APP_DEBUG=true → debug=True - - # --- Secrets --- - secrets_dir="/run/secrets", # Docker/K8s secrets volume path - - # --- Extra fields --- - extra="ignore", # "allow", "ignore", or "forbid" - - # --- Validation --- - validate_default=True, # validate default values too - ) -``` - -## Priority Chain - -Values are resolved in this order (highest to lowest priority): - -1. **Explicit constructor arguments**: `AppSettings(debug=True)` -2. **Environment variables**: `export APP_DEBUG=true` -3. **`.env` file(s)**: `.env` or specified paths -4. **Secrets directory**: files named after the field -5. **Field defaults**: `debug: bool = False` - -## Nested Models with `__` Delimiter - -```python -from pydantic import BaseModel, SecretStr, computed_field -from pydantic_settings import BaseSettings, SettingsConfigDict - - -class DatabaseSettings(BaseModel): - host: str = "localhost" - port: int = 5432 - name: str = "app" - user: str = "postgres" - password: SecretStr = SecretStr("postgres") - - @computed_field - @property - def url(self) -> str: - pwd = self.password.get_secret_value() - return f"postgresql+asyncpg://{self.user}:{pwd}@{self.host}:{self.port}/{self.name}" - - -class RedisSettings(BaseModel): - host: str = "localhost" - port: int = 6379 - db: int = 0 - password: SecretStr | None = None - - @computed_field - @property - def url(self) -> str: - auth = f":{self.password.get_secret_value()}@" if self.password else "" - return f"redis://{auth}{self.host}:{self.port}/{self.db}" - - -class AppSettings(BaseSettings): - model_config = SettingsConfigDict( - env_file=".env", - env_nested_delimiter="__", - ) - - debug: bool = False - secret_key: SecretStr - database: DatabaseSettings = DatabaseSettings() - redis: RedisSettings = RedisSettings() -``` - -With `env_nested_delimiter="__"`, the following env vars map to the nested models: - -```bash -DATABASE__HOST=db.prod.internal -DATABASE__PORT=5433 -DATABASE__NAME=myapp -DATABASE__USER=appuser -DATABASE__PASSWORD=s3cret - -REDIS__HOST=redis.prod.internal -REDIS__PASSWORD=redispass - -SECRET_KEY=my-super-secret-key -DEBUG=false -``` - -## Secrets Directory Pattern - -For Docker and Kubernetes, secrets are often mounted as files. BaseSettings reads them automatically if you set `secrets_dir`. - -```python -class AppSettings(BaseSettings): - model_config = SettingsConfigDict( - secrets_dir="/run/secrets", - ) - - secret_key: SecretStr # reads /run/secrets/secret_key - database_password: SecretStr # reads /run/secrets/database_password -``` - -File content is read as the field value. For `SecretStr`, the value is wrapped automatically. - -You can also pass the secrets directory at runtime: - -```python -settings = AppSettings(_secrets_dir="/custom/secrets/path") -``` - -## Multiple `.env` Files - -Pass a tuple of paths. Later files take lower priority (first match wins): - -```python -class AppSettings(BaseSettings): - model_config = SettingsConfigDict( - env_file=(".env", ".env.local", ".env.production"), - env_file_encoding="utf-8", - ) -``` - -You can also override the env file at instantiation: - -```python -settings = AppSettings(_env_file=".env.test") -``` - -## Custom Settings Sources - -Implement `PydanticBaseSettingsSource` to add custom backends (e.g., AWS SSM, Vault, remote config): - -```python -from pydantic.fields import FieldInfo -from pydantic_settings import BaseSettings, PydanticBaseSettingsSource - - -class VaultSettingsSource(PydanticBaseSettingsSource): - """Load settings from HashiCorp Vault.""" - - def get_field_value( - self, field: FieldInfo, field_name: str - ) -> tuple[object, str, bool]: - # fetch from Vault here - value = fetch_from_vault(field_name) - return value, field_name, False - - def __call__(self) -> dict[str, object]: - return { - field_name: self.get_field_value(field_info, field_name)[0] - for field_name, field_info in self.settings_cls.model_fields.items() - } - - -class AppSettings(BaseSettings): - secret_key: str - db_password: str - - @classmethod - def settings_customise_sources( - cls, - settings_cls: type[BaseSettings], - init_settings: PydanticBaseSettingsSource, - env_settings: PydanticBaseSettingsSource, - dotenv_settings: PydanticBaseSettingsSource, - file_secret_settings: PydanticBaseSettingsSource, - ) -> tuple[PydanticBaseSettingsSource, ...]: - return ( - init_settings, - env_settings, - VaultSettingsSource(settings_cls), # custom source in priority chain - dotenv_settings, - file_secret_settings, - ) -``` - -## Full Working Example - -A complete settings setup for a web application with database, redis, and app-level config: - -```python -from __future__ import annotations - -from functools import lru_cache -from typing import Literal - -from pydantic import AnyHttpUrl, BaseModel, Field, SecretStr, computed_field -from pydantic_settings import BaseSettings, SettingsConfigDict - - -class DatabaseSettings(BaseModel): - host: str = "localhost" - port: int = 5432 - name: str = "app" - user: str = "postgres" - password: SecretStr = SecretStr("postgres") - pool_size: int = Field(default=5, ge=1, le=100) - max_overflow: int = Field(default=10, ge=0, le=50) - - @computed_field - @property - def url(self) -> str: - pwd = self.password.get_secret_value() - return f"postgresql+asyncpg://{self.user}:{pwd}@{self.host}:{self.port}/{self.name}" - - @computed_field - @property - def sync_url(self) -> str: - pwd = self.password.get_secret_value() - return f"postgresql+psycopg2://{self.user}:{pwd}@{self.host}:{self.port}/{self.name}" - - -class RedisSettings(BaseModel): - host: str = "localhost" - port: int = 6379 - db: int = 0 - password: SecretStr | None = None - ssl: bool = False - - @computed_field - @property - def url(self) -> str: - scheme = "rediss" if self.ssl else "redis" - auth = f":{self.password.get_secret_value()}@" if self.password else "" - return f"{scheme}://{auth}{self.host}:{self.port}/{self.db}" - - -class AppSettings(BaseSettings): - model_config = SettingsConfigDict( - env_file=(".env", ".env.local"), - env_file_encoding="utf-8", - env_nested_delimiter="__", - case_sensitive=False, - extra="ignore", - ) - - # App-level - environment: Literal["development", "staging", "production"] = "development" - debug: bool = False - secret_key: SecretStr - allowed_hosts: list[str] = ["localhost", "127.0.0.1"] - cors_origins: list[AnyHttpUrl] = [] - - # Nested - database: DatabaseSettings = DatabaseSettings() - redis: RedisSettings = RedisSettings() - - @computed_field - @property - def is_production(self) -> bool: - return self.environment == "production" - - -@lru_cache(maxsize=1) -def get_settings() -> AppSettings: - """Return cached settings instance. Call once at startup.""" - return AppSettings() -``` - -Corresponding `.env` file: - -```dotenv -ENVIRONMENT=production -DEBUG=false -SECRET_KEY=change-me-in-production - -DATABASE__HOST=db.internal -DATABASE__PORT=5432 -DATABASE__NAME=myapp -DATABASE__USER=appuser -DATABASE__PASSWORD=dbpassword -DATABASE__POOL_SIZE=10 -DATABASE__MAX_OVERFLOW=20 - -REDIS__HOST=redis.internal -REDIS__PASSWORD=redispassword -REDIS__SSL=true - -ALLOWED_HOSTS=["myapp.com","www.myapp.com"] -CORS_ORIGINS=["https://myapp.com"] -``` - -## Tips and Gotchas - -- **Lists and dicts from env**: Pydantic parses JSON-formatted strings. `ALLOWED_HOSTS=["a","b"]` works; `ALLOWED_HOSTS=a,b` does not for `list[str]`. -- **`env_prefix` applies to top-level fields only** -- nested model fields still use the `__` delimiter from the nested model's own name, not the prefix. -- **`SecretStr` in nested models**: Pydantic unwraps and re-wraps secret values correctly across nesting levels. -- **Docker Compose**: Use `env_file:` in compose to mirror the `.env` file without `__` flattening -- or pass vars directly and rely on the delimiter. -- **`@lru_cache` for settings**: Wrap `AppSettings()` in an `@lru_cache(maxsize=1)` function to avoid re-reading the filesystem on every access. diff --git a/plugins/flow/skills/pydantic/references/migration-v1-v2.md b/plugins/flow/skills/pydantic/references/migration-v1-v2.md deleted file mode 100644 index e3442f0..0000000 --- a/plugins/flow/skills/pydantic/references/migration-v1-v2.md +++ /dev/null @@ -1,277 +0,0 @@ -# Pydantic V1 → V2 Migration Guide - -Pydantic v2 is a ground-up rewrite with a Rust core. Most APIs changed. This guide maps every common V1 pattern to its V2 equivalent and documents breaking changes and gotchas. - -## Full API Mapping Table - -### Model Configuration - -| V1 | V2 | -|----|-----| -| `class Config: orm_mode = True` | `model_config = ConfigDict(from_attributes=True)` | -| `class Config: extra = "forbid"` | `model_config = ConfigDict(extra="forbid")` | -| `class Config: validate_assignment = True` | `model_config = ConfigDict(validate_assignment=True)` | -| `class Config: arbitrary_types_allowed = True` | `model_config = ConfigDict(arbitrary_types_allowed=True)` | -| `class Config: use_enum_values = True` | `model_config = ConfigDict(use_enum_values=True)` | -| `class Config: allow_population_by_field_name = True` | `model_config = ConfigDict(populate_by_name=True)` | -| `class Config: underscore_attrs_are_private = True` | `model_config = ConfigDict(...)` + use `PrivateAttr` explicitly | -| `class Config: schema_extra = {...}` | `model_config = ConfigDict(json_schema_extra={...})` | -| `class Config: alias_generator = ...` | `model_config = ConfigDict(alias_generator=...)` | -| `class Config: fields = {"x": {"alias": "y"}}` | `x: T = Field(alias="y")` | - -### Validators - -| V1 | V2 | -|----|-----| -| `@validator("field")` | `@field_validator("field")` | -| `@validator("field", pre=True)` | `@field_validator("field", mode="before")` | -| `@validator("field", always=True)` | `@field_validator("field")` (always runs in V2) | -| `@validator("field", each_item=True)` | Validate inside the validator manually or use `@field_validator` on items | -| `@root_validator` | `@model_validator(mode="before")` or `@model_validator(mode="after")` | -| `@root_validator(pre=True)` | `@model_validator(mode="before")` | -| `@root_validator(pre=False)` / `@root_validator` | `@model_validator(mode="after")` | -| `cls` as first arg in `@validator` | `@classmethod` decorator required + `cls` first arg | -| `values` dict param in `@root_validator` | Instance `self` (mode='after') or raw `data` dict (mode='before') | - -### Serialization - -| V1 | V2 | -|----|-----| -| `.dict()` | `.model_dump()` | -| `.dict(exclude_unset=True)` | `.model_dump(exclude_unset=True)` | -| `.dict(by_alias=True)` | `.model_dump(by_alias=True)` | -| `.json()` | `.model_dump_json()` | -| `.json(by_alias=True)` | `.model_dump_json(by_alias=True)` | -| `.schema()` | `.model_json_schema()` | -| `.schema_json()` | `json.dumps(.model_json_schema())` | - -### Parsing / Instantiation - -| V1 | V2 | -|----|-----| -| `MyModel.parse_obj(data)` | `MyModel.model_validate(data)` | -| `MyModel.parse_raw(json_str)` | `MyModel.model_validate_json(json_str)` | -| `MyModel.parse_file(path)` | Read file + `model_validate_json(content)` | -| `MyModel.from_orm(obj)` | `MyModel.model_validate(obj, from_attributes=True)` or set `ConfigDict(from_attributes=True)` | -| `MyModel.construct(**kwargs)` | `MyModel.model_construct(**kwargs)` | -| `MyModel.copy()` | `MyModel.model_copy()` | -| `MyModel.copy(update={...})` | `MyModel.model_copy(update={...})` | - -### Introspection - -| V1 | V2 | -|----|-----| -| `MyModel.__fields__` | `MyModel.model_fields` | -| `MyModel.__fields_set__` | `instance.model_fields_set` | -| `MyModel.__validators__` | (removed — use `MyModel.__pydantic_validator__`) | -| `MyModel.schema()` | `MyModel.model_json_schema()` | -| `MyModel.__config__` | `MyModel.model_config` | - -### Custom Types - -| V1 | V2 | -|----|-----| -| `class MyType: @classmethod def __get_validators__(cls)` | `Annotated[T, BeforeValidator(...)]` or implement `__get_pydantic_core_schema__` | -| `@validator` + `arbitrary_types_allowed` | `Annotated[T, BeforeValidator(fn)]` with `ConfigDict(arbitrary_types_allowed=True)` | - -### BaseSettings (pydantic-settings) - -| V1 (built-in) | V2 (pydantic-settings package) | -|---------------|-------------------------------| -| `from pydantic import BaseSettings` | `from pydantic_settings import BaseSettings` | -| `class Config: env_nested_delimiter = "__"` | `model_config = SettingsConfigDict(env_nested_delimiter="__")` | -| `class Config: env_file = ".env"` | `model_config = SettingsConfigDict(env_file=".env")` | - -**Note**: `pydantic-settings` is a separate package in V2. Install with `pip install pydantic-settings`. - ---- - -## Breaking Changes and Gotchas - -### `@validator` → `@field_validator` requires `@classmethod` - -```python -# V1 — no @classmethod needed -@validator("name") -def validate_name(cls, v): - return v.strip() - -# V2 — @classmethod is required -@field_validator("name") -@classmethod -def validate_name(cls, v: str) -> str: - return v.strip() -``` - -### `@root_validator` mode semantics changed - -```python -# V1 -@root_validator(pre=True) -def check_raw(cls, values): - return values - -@root_validator -def check_after(cls, values): - # values is a dict even in post mode - return values - -# V2 -@model_validator(mode="before") -@classmethod -def check_raw(cls, data: dict) -> dict: - return data - -@model_validator(mode="after") -def check_after(self) -> "MyModel": - # self is the fully constructed model instance - return self -``` - -### `mode="after"` receives instance, not dict - -In V1, `@root_validator` always gave you a dict of `values`. In V2, `mode="after"` gives you the actual model instance (`self`). Access fields as attributes. - -### `@validator` `values` param is gone - -V1 validators could access previously-validated fields via the `values` kwarg: - -```python -# V1 -@validator("end_date") -def check_end_after_start(cls, v, values): - if "start_date" in values and v < values["start_date"]: - raise ValueError("end must be after start") - return v -``` - -In V2, use `@model_validator(mode="after")` for cross-field validation instead: - -```python -# V2 -@model_validator(mode="after") -def check_dates(self) -> "MyModel": - if self.end_date < self.start_date: - raise ValueError("end must be after start") - return self -``` - -### `Field` default behavior - -In V1, `Field(alias="x")` alone enabled population by alias. In V2, you must also set `ConfigDict(populate_by_name=True)` if you want to populate by the Python field name too. - -```python -# V2 -class MyModel(BaseModel): - model_config = ConfigDict(populate_by_name=True) - my_field: str = Field(alias="myField") - -MyModel(myField="a") # works -MyModel(my_field="a") # also works (because populate_by_name=True) -``` - -### `model_dump()` returns Python objects, not JSON-safe types - -`model_dump()` may return `datetime`, `UUID`, `Decimal`, etc. Use `model_dump(mode="json")` or `model_dump_json()` when you need JSON-serializable output. - -### `model_construct()` skips validation - -`model_construct()` (V2) and `construct()` (V1) both skip validation. Use only when you know the data is already valid (e.g., reading from a trusted database). - -### `from __future__ import annotations` breaks V2 - -Pydantic v2 relies on runtime type evaluation. `from __future__ import annotations` turns all annotations into strings, breaking validators, DI, and schema generation. Do not use it with Pydantic models. - -### Constrained types changed - -```python -# V1 -from pydantic import constr, conint, confloat - -name: constr(min_length=1, max_length=50) -age: conint(gt=0, le=150) - -# V2 — use Field() or Annotated -from pydantic import Field -from typing import Annotated - -name: Annotated[str, Field(min_length=1, max_length=50)] -age: Annotated[int, Field(gt=0, le=150)] -``` - -### `ValidationError` format changed - -V2 `ValidationError.errors()` returns a list of dicts with a new structure. The `loc` field is now a tuple of strings/ints (same as V1), but `type` uses dot-separated identifiers (e.g., `"string_too_short"` instead of `"value_error.any_str.min_length"`). Update any code that pattern-matches on error types. - ---- - -## Coexistence Patterns During Migration - -### Strategy 1: Model-by-Model Migration - -Migrate one model at a time. V1 and V2 models can coexist in the same codebase if they are not mixed within the same inheritance hierarchy. - -```python -# Keep V1 models temporarily -from pydantic.v1 import BaseModel as V1BaseModel - -class LegacyModel(V1BaseModel): # uses V1 semantics - class Config: - orm_mode = True - -# New models use V2 -from pydantic import BaseModel, ConfigDict - -class NewModel(BaseModel): - model_config = ConfigDict(from_attributes=True) -``` - -Pydantic v2 ships a `pydantic.v1` compatibility shim that re-exports the entire V1 API. This lets you migrate files incrementally without breaking existing code. - -### Strategy 2: `pydantic.v1` Shim - -```python -# Before migration — swap import, no other changes needed yet -from pydantic.v1 import BaseModel, validator, root_validator, Field - -class OldModel(BaseModel): - name: str - - @validator("name") - def strip_name(cls, v): - return v.strip() -``` - -This is a stopgap. The V1 shim will be removed in a future version. Use it to unblock upgrades, not as a permanent solution. - -### Strategy 3: Adapter Layer - -Wrap V1 model instantiation behind a factory so you can swap implementations: - -```python -def create_user(data: dict) -> UserModel: - return UserModel.model_validate(data) # V2 - # return UserModel.parse_obj(data) # V1 — swap when migrating -``` - ---- - -## Migration Checklist - -- [ ] Replace `from pydantic import BaseSettings` with `from pydantic_settings import BaseSettings` -- [ ] Replace all `class Config` blocks with `model_config = ConfigDict(...)` -- [ ] Replace `orm_mode = True` → `from_attributes=True` -- [ ] Replace `@validator` → `@field_validator` (add `@classmethod`) -- [ ] Replace `@root_validator(pre=True)` → `@model_validator(mode="before")` (add `@classmethod`) -- [ ] Replace `@root_validator` → `@model_validator(mode="after")` (no `@classmethod`; use `self`) -- [ ] Replace `.dict()` → `.model_dump()` -- [ ] Replace `.json()` → `.model_dump_json()` -- [ ] Replace `.parse_obj()` → `.model_validate()` -- [ ] Replace `.parse_raw()` → `.model_validate_json()` -- [ ] Replace `.copy()` → `.model_copy()` -- [ ] Replace `.construct()` → `.model_construct()` -- [ ] Replace `__fields__` → `model_fields` -- [ ] Replace `constr/conint/confloat` → `Annotated[T, Field(...)]` -- [ ] Remove `from __future__ import annotations` from model files -- [ ] Update `ValidationError` error type string matching in tests/error handlers -- [ ] Add `populate_by_name=True` to `ConfigDict` if fields use aliases and need dual-name access diff --git a/plugins/flow/skills/python/SKILL.md b/plugins/flow/skills/python/SKILL.md deleted file mode 100644 index 565b719..0000000 --- a/plugins/flow/skills/python/SKILL.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -name: python -description: "Use when editing Python files, pyproject.toml, requirements.txt, setup.py, setup.cfg, uv workflows, ruff, mypy, typing, packaging, scripts, Cython, or Mypyc extension builds." ---- - -# Python Skill - -## Overview - -Expert knowledge for Python development in this workspace. This skill aggregates tooling, build systems, and quality standards. - -### Core Standards - -As per workspace rules, the following are **MANDATORY**: - -1. **Tooling**: `uv` is the required tool for Python package and environment management. -2. **Execution**: Always run python programs with the `uv run` prefix. -3. **Installation**: Use `uv` with `pyproject.toml` and install to a virtual environment. -4. **Typing**: Use `>=3.10` types as per PEP 585 (e.g., `dict`, `list` instead of `typing.Dict`, `typing.List`). -5. **Comments**: Prefer docstrings and type annotations over inline comments. Use inline comments only when the logic is non-obvious and cannot be clarified through better naming or type hints. - ---- - - - -## References Index - -For detailed guides on specific tools and sub-systems, refer to the following documents: - -### Package & Project Management - -- **[`uv` Guide](references/uv.md)** - - Initializing projects, managing dependencies, workspaces, and tool execution (`uvx`). - -### Code Quality - -- **[Quality Standards](references/quality.md)** - - Ruff, Mypy, and Pyright configurations. - -### Build Systems & compiled Extensions - -- **[Building & Packaging](references/build.md)** - - Standalone binaries, patching, and distributing. -- **[Cython Extensions](references/cython.md)** - - Compiling C extensions for performance. -- **[Mypyc Extensions](references/mypyc.md)** - - Mypyc-compatible classes and compilation workflows. - - - - - -## Example: pyproject.toml Setup - -```toml -[project] -name = "myapp" -version = "0.1.0" -requires-python = ">=3.12" -dependencies = ["litestar>=2.0", "sqlalchemy>=2.0"] - -[tool.uv] -dev-dependencies = ["pytest>=8.0", "ruff>=0.8", "mypy>=1.13"] - -[tool.ruff] -target-version = "py312" -line-length = 120 - -[tool.ruff.lint] -select = ["ALL"] -ignore = ["D", "ANN101"] - -[tool.mypy] -strict = true -python_version = "3.12" -``` - - - ---- - -## Official References - -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Guardrails - -Add guardrails instructions here. - - - -## Validation - -Add validation instructions here. - diff --git a/plugins/flow/skills/python/agents/openai.yaml b/plugins/flow/skills/python/agents/openai.yaml deleted file mode 100644 index 47c244a..0000000 --- a/plugins/flow/skills/python/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Python" - short_description: "Python packaging, uv, ruff, mypy, pyproject, scripts, and extension builds" diff --git a/plugins/flow/skills/python/references/build.md b/plugins/flow/skills/python/references/build.md deleted file mode 100644 index 83ea0a9..0000000 --- a/plugins/flow/skills/python/references/build.md +++ /dev/null @@ -1,132 +0,0 @@ - -# Python Build Skill - -## Overview - -Modern Python packaging relies on `pyproject.toml` (PEP 621) and build backends (PEP 517). `hatchling` is a popular, modern, extensible build backend. - -## Hatchling Configuration - -### Basic `pyproject.toml` Setup - -```toml -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "my-project" -version = "0.1.0" -description = "My awesome project" -readme = "README.md" -requires-python = ">=3.12" -license = "MIT" -authors = [ - { name = "Cody", email = "cody@example.com" }, -] -dependencies = [ - "httpx", -] - -[project.scripts] -my-cli = "my_project.cli:main" -``` - -### Dynamic Versioning - -Use `hatch-vcs` to derive version from Git tags. - -```toml -[build-system] -requires = ["hatchling", "hatch-vcs"] -build-backend = "hatchling.build" - -[tool.hatch.version] -source = "vcs" - -[tool.hatch.build.hooks.vcs] -version-file = "src/my_project/_version.py" -``` - -### Build Targets - -**Wheel (default)**: -Includes everything in the project root defined by packages. - -```toml -[tool.hatch.build.targets.wheel] -packages = ["src/my_project"] -``` - -**Sdist**: -Source distribution. - -```toml -[tool.hatch.build.targets.sdist] -include = [ - "src", - "tests", - "LICENSE", - "README.md", -] -``` - -## Hatch (The Tool) - -Hatch is also a project manager (like `uv`), but `hatchling` (the build backend) is often used with `uv`. - -If using `hatch` for environment management: - -```bash -# Create env -hatch env create - -# Run command -hatch run test -``` - -**Recommendation**: Use `uv` for project/environment management and `hatchling` as the build backend. - -## Other Build Backends - -### Setuptools (Legacy/Standard) - -```toml -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" -``` - -### Flit (Simple) - -Good for pure Python packages with no build steps. - -```toml -[build-system] -requires = ["flit_core >=3.2,<4"] -build-backend = "flit_core.buildapi" -``` - -### Poetry - -```toml -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" -``` - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/python/references/cython.md b/plugins/flow/skills/python/references/cython.md deleted file mode 100644 index 0f4339b..0000000 --- a/plugins/flow/skills/python/references/cython.md +++ /dev/null @@ -1,131 +0,0 @@ - -# Cython Optimization Skill - -## Overview - -Cython allows compiling Python-like code to C extensions, offering performance comparable to C. - -## Core Best Practices - -### 1. Static Typing (The 80/20 Rule) - -Type declarations provide the massive speedups. - -```python -# Pure Python mode (recommended for modern code) -import cython - -def f(x: cython.int): - y: cython.double = 0.5 - return x + y - -# .pyx syntax (traditional) -cpdef int f(int x): - cdef double y = 0.5 - return x + y -``` - -### 2. Typed Memoryviews (Fast Array Access) - -Avoid raw pointers. Use typed memoryviews to access NumPy arrays or memory buffers without Python overhead. - -```python -import numpy as np -cimport numpy as np - -# 'double[:]' is a 1D memoryview of doubles -def sum_array(double[:] arr): - cdef int i - cdef double total = 0.0 - # nogil allows multi-threading - with nogil: - for i in range(arr.shape[0]): - total += arr[i] - return total -``` - -### 3. Compiler Directives - -Disable safety checks in hot loops **after** verification. - -```python -# cython: boundscheck=False -# cython: wraparound=False -# cython: cdivision=True - -@cython.boundscheck(False) -@cython.wraparound(False) -def fast_loop(int[:] data): - ... -``` - -### 4. Direct C-API Interaction - -Interface directly with C libraries without Python overhead. - -```python -cdef extern from "math.h": - double sin(double x) - -def fast_sin(double x): - return sin(x) -``` - -## Compilation (Modern `pyproject.toml`) - -Use `scikit-build-core` or `meson-python` (or standard `setuptools` with `Cython`). - -**`pyproject.toml` (setuptools approach)**: - -```toml -[build-system] -requires = ["setuptools", "wheel", "Cython", "numpy"] -build-backend = "setuptools.build_meta" -``` - -**`setup.py`**: - -```python -from setuptools import setup, Extension -from Cython.Build import cythonize -import numpy - -extensions = [ - Extension( - "my_module", - ["my_module.pyx"], - include_dirs=[numpy.get_include()], - # Optimization flags - extra_compile_args=["-O3", "-march=native"], - ) -] - -setup( - ext_modules=cythonize(extensions, compiler_directives={"language_level": "3"}) -) -``` - -## Optimization Checklist - -- [ ] Profile first (cProfile, viztracer) -- [ ] Add static types (`cdef type var`) -- [ ] Use `cdef class` (extension types) instead of `class` -- [ ] Replace list/tuple with typed memoryviews (`double[:]`) -- [ ] Release GIL (`with nogil`) for CPU-bound tasks in loops -- [ ] Check `cython -a module.pyx` (yellow lines = Python interaction) - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/python/references/mypyc.md b/plugins/flow/skills/python/references/mypyc.md deleted file mode 100644 index 72705d9..0000000 --- a/plugins/flow/skills/python/references/mypyc.md +++ /dev/null @@ -1,103 +0,0 @@ - -# MyPyC Optimization Skill - -## Overview - -MyPyC compiles standard, type-annotated Python code into C extensions. It is the compiler used by `mypy` itself. - -## Core Optimization Patterns - -### 1. Native Classes & Memory Layout - -MyPyC optimizes native classes significantly better than standard Python classes. - -- **Native Classes**: Defined by simply compiling a class. They use C structures for memory layout. -- **`__slots__`**: Always use `__slots__` to ensure fixed memory layout and faster attribute access. -- **Traits (`@trait`)**: Use `mypy_extensions.trait` for native class multiple inheritance/mixins. Standard multiple inheritance is NOT supported for native classes. - - ```python - from mypy_extensions import trait - - @trait - class Hashable: - def __init__(self) -> None: ... - def hash_value(self) -> int: ... - - class Item(Hashable): ... - ``` - -### 2. Type Annotations & Inference - -Types are not just hints; they are compiled to C types. - -- **Precise Types**: Use `int`, `str`, `float` (native C types). -- **Early Binding**: MyPyC resolves attributes/methods at compile time. Dynamic access (`getattr`) breaks this and is much slower. -- **Annotate External Libraries**: Even if a library isn't compiled, annotating calls to it helps MyPyC generate optimized C code for the call site. - -### 3. High-Performance Idioms - -- **Fast Paths**: Implement checks to skip complex logic (e.g., identity checks before equality). -- **Pre-allocation**: Avoid creating objects in hot loops. Reuse buffers or separate creation from processing. -- **Avoid "Slow" Python Features**: - - **Class Decorators/Metaclasses**: Generally unsupported or slow. - - **Monkey Patching**: Compiled code is immutable. You cannot `mock.patch` compiled methods easily. - - **Profiling**: specialized tools required (e.g., `linux-perf` on the binary), `cProfile` often misses C-level details. - -## Limitations & Gotchas - -### 1. Runtime Behavior - -- **Type Enforcement**: Unlike interpreted Python, MyPyC enforces types at runtime. `TypeError` will be raised for violations. `Any` is dangerous. -- **Executability**: Compiled modules must be imported; they cannot be run directly as scripts. -- **Immutability**: Function and class definitions are frozen. - -### 2. Known Issues - -- **`Final` Constants**: Can cause crashes if returned under specific conditions involving `None`. -- **`match` Statements**: Tuple matching implementation may vary from CPython semantics in edge cases. -- **`TYPE_CHECKING` Blocks**: Code inside these blocks is strictly stripped, sometimes leading to "unreachable code" errors if logic depends on it. - -## The "SQLSpec" Pattern - -Best practices derived from `sqlspec` optimizations: - -1. **Strict Type Guards**: Use `isinstance` checks that MyPyC can verify to narrow types in hot paths. -2. **No Dataclasses in Hot Paths**: While supported, manual `__init__` + `__slots__` offers more predictable C-struct generation for performance-critical objects. -3. **No `from __future__ import annotations`**: Stringified annotations can obscure types from the compiler. -4. **Hybrid Inheritance**: If you need interpreted classes to inherit from compiled ones, use `@mypyc_attr(allow_interpreted_subclasses=True)`, but be aware of the performance penalty (vtable lookups become slower). - -## Build Configuration (Hatch) - -```toml -[build-system] -requires = ["hatchling", "hatch-mypyc"] -build-backend = "hatchling.build" - -[tool.hatch.build.targets.wheel.hooks.mypyc] -enable-by-default = false -dependencies = ["hatch-mypyc", "mypy_extensions"] -include = ["src/my_package/core"] -options = { opt_level = "3" } -``` - -## Debugging Compilation - -1. **Clean MyPy Run**: Ensure `mypy .` passes cleanly. -2. **Strictness**: Use strict mode in mypy configuration to catch `Any` types that degrade performance. -3. **Fallback Check**: If performance is bad, check if the module failed compilation and silently fell back to interpreted mode (check build logs). - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/python/references/quality.md b/plugins/flow/skills/python/references/quality.md deleted file mode 100644 index 2df06a0..0000000 --- a/plugins/flow/skills/python/references/quality.md +++ /dev/null @@ -1,118 +0,0 @@ - -# Python Quality Skill - -## Ruff (Linter & Formatter) - -Ruff is the de-facto standard for Python linting and formatting. - -### Configuration (`pyproject.toml`) - -```toml -[tool.ruff] -# Target Python version -target-version = "py312" -line-length = 100 - -[tool.ruff.lint] -# Enable standard rules -select = [ - "E", # pycodestyle errors - "W", # pycodestyle warnings - "F", # pyflakes - "I", # isort - "B", # flake8-bugbear - "C4", # flake8-comprehensions - "UP", # pyupgrade - "ARG", # flake8-unused-arguments - "SIM", # flake8-simplify - "RUF", # ruff-specific rules -] -ignore = [ - "E501", # Line too long (handled by formatter) -] - -# Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -[tool.ruff.format] -quote-style = "double" -indent-style = "space" -skip-magic-trailing-comma = false -line-ending = "auto" -``` - -### Usage - -```bash -# Lint -uvx ruff check . - -# Lint and fix -uvx ruff check --fix . - -# Format -uvx ruff format . -``` - -## Type Checking (Pyright & Based-Pyright) - -### Pyright - -Fast static type checker from Microsoft. - -**`pyproject.toml`**: - -```toml -[tool.pyright] -include = ["src"] -exclude = ["**/node_modules", "**/__pycache__", ".venv"] -venvPath = "." -venv = ".venv" - -# Type checking strictness -typeCheckingMode = "strict" # or "basic", "standard" -pythonVersion = "3.12" - -# Specific overrides -reportMissingImports = true -reportMissingTypeStubs = false -``` - -### Based-Pyright - -A fork of Pyright with stricter rules and Pylance features (like inlay hints) enabled for all editors. - -**`pyproject.toml`**: - -```toml -[tool.basedpyright] -include = ["src"] -target-version = "py312" - -# Based-pyright specific features -typeCheckingMode = "all" # Enable all rules by default -reportAny = false # Disable "Any" type reporting if too noisy -``` - -## Best Practices - -1. **Run via `uvx`**: `uvx ruff check`, `uvx basedpyright`. -2. **Pre-commit**: Use `pre-commit` to enforce these checks locally. -3. **CI**: Run `ruff check`, `ruff format --check`, and type checking in CI. -4. **Strictness**: Start strict (`typeCheckingMode = "strict"` or `all`) and suppress specific errors rather than starting loose. - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/python/references/uv.md b/plugins/flow/skills/python/references/uv.md deleted file mode 100644 index 54556fc..0000000 --- a/plugins/flow/skills/python/references/uv.md +++ /dev/null @@ -1,186 +0,0 @@ - -# `uv` Skill - -## Overview - -`uv` is an extremely fast Python package and project manager written in Rust. It replaces `pip`, `pip-tools`, `pipx`, `poetry`, `pyenv`, `twine`, and `virtualenv`. - -## Core Capabilities - -### 1. Project Initialization & Management - -```bash -# Initialize a new project (application) -uv init my-app -cd my-app - -# Initialize a library -uv init --lib my-lib - -# specific python version -uv init --python 3.12 -``` - -### 2. Dependency Management - -```bash -# Add dependencies (updates pyproject.toml and creates/updates uv.lock) -uv add requests httpx - -# Add development dependencies -uv add --dev pytest ruff mypy - -# Remove dependencies -uv remove requests - -# Sync environment with lockfile -uv sync -``` - -### 3. Virtual Environment Management - -```bash -# Create a virtual environment -uv venv - -# Activate (standard) -source .venv/bin/activate -``` - -### 4. Running Code & Tools - -`uv` can run scripts and tools in ephemeral environments or the project environment. - -```bash -# Run a script with dependencies (PEP 723) -uv run script.py - -# Run a command in the project environment -uv run python manage.py runserver - -# Run a tool (like pipx) -uvx ruff check -uvx --from "cowsay" cowsay "Hello" -``` - -### 5. Python Version Management - -`uv` manages Python versions automatically. - -```bash -# Install a specific version -uv python install 3.12 - -# Pin a project to a version -uv python pin 3.11 - -# List available versions -uv python list -``` - -### 6. Workspaces - -`uv` supports Cargo-style workspaces for monorepos. - -**Root `pyproject.toml`**: - -```toml -[project] -name = "my-workspace" -version = "0.1.0" -requires-python = ">=3.12" -dependencies = [] - -[tool.uv.workspace] -members = ["packages/*", "apps/*"] -``` - -**Child `pyproject.toml` (e.g., `packages/utils`)**: - -```toml -[project] -name = "utils" -version = "0.1.0" -requires-python = ">=3.12" -dependencies = ["httpx"] - -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" -``` - -## Best Practices - -- **Always use `uv run`**: Avoid manually activating virtual environments. `uv run` ensures the environment is in sync. -- **Lockfile**: Commit `uv.lock` to ensure reproducible builds. -- **Scripts**: Use `script.py` with inline metadata for single-file tools. - - ```python - # /// script - # requires-python = ">=3.12" - # dependencies = [ - # "requests<3", - # "rich", - # ] - # /// - ``` - -- **CI/CD**: `uv` is optimized for caching. Use `uv sync` in CI. - -## Common patterns - -- **Export to requirements.txt**: `uv export --format requirements-txt > requirements.txt` -- **Upgrade all packages**: `uv lock --upgrade` - -## Deployment - -### Package Distribution - -Build and publish wheels leveraging modern endpoints: - -```bash -uv build -uv publish -``` - -Uses secure OIDC-authenticated setups by default simplifying registry trust. - ---- - -## CI/CD Actions - -Example GitHub Actions workflow using official support: - -```yaml -name: Python CI -on: [push, pull_request] - -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Install uv - uses: astral-sh/setup-uv@v5 - with: - enable-cache: true - cache-dependency-glob: "uv.lock" - - - run: uv sync - - run: uv run pytest tests/ -``` - -## Official References - -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/railway-tools/SKILL.md b/plugins/flow/skills/railway-tools/SKILL.md deleted file mode 100644 index d89e228..0000000 --- a/plugins/flow/skills/railway-tools/SKILL.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -name: railway-tools -description: "Use when deploying to Railway, editing railway.toml, railway.json, Procfile, configuring Railway services, databases, workers, environment variables, or troubleshooting Railway deployments." ---- - -# Railway Deployment (Flow Tools) - - - -## 🚀 Official Railway Skills (Highly Recommended) - -For full project management, deployment automation, and service orchestration, we highly recommend installing the official Railway agent skills: - -- **use-railway**: Master skill for setting up projects, services, and handling deployments. - -**Installation:** - -```bash -npx skills add railwayapp/railway-skills -``` - -## Supplemental Patterns - -The patterns below provide additional context for Flow-specific multi-service architectures and serverless constraints. - -### Serverless / App Sleeping - -Railway's serverless feature puts services to sleep after 10 minutes of no **outbound** traffic. - -**When to disable serverless (`sleepApplication: false`):** - -- Background workers (Celery, SAQ, RQ, Sidekiq) -- Queue processors -- Cron services -- Any service without an HTTP endpoint - -### Multi-Service Architecture (Web + Worker) - -For applications with background task processing, use distinct configuration files (e.g., `railway.app.json` and `railway.worker.json`) to manage different runtime requirements. - - - - -## Guardrails - -- **Disable Serverless for Workers:** Always set `sleepApplication: false` for background worker services. -- **Use Variable References:** Prefer `${{Service.VARIABLE}}` syntax (e.g., `${{Postgres.DATABASE_URL}}`) instead of hardcoding values. -- **Dynamic Port Binding:** Never hardcode the port. Always reference `${{PORT}}` for application port injection. - - - -## Validation - -- **Verify `sleepApplication: false` for Workers:** Ensure any background worker has `sleepApplication` explicitly set to `false`. -- **Check PORT Dynamic Reference:** Confirm that start commands and environment variables reference `${{PORT}}`. - - - -## Multi-Service Worker Example - -**Configuration (`railway.worker.json`):** - -```json -{ - "service": { - "name": "worker", - "sleepApplication": false - }, - "deploy": { - "startCommand": "python -m saq my_app.tasks.worker --workers 4" - } -} -``` - - diff --git a/plugins/flow/skills/railway-tools/agents/openai.yaml b/plugins/flow/skills/railway-tools/agents/openai.yaml deleted file mode 100644 index ff376f2..0000000 --- a/plugins/flow/skills/railway-tools/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Railway Tools" - short_description: "Flow supplemental Railway deployment, service, database, and worker patterns" diff --git a/plugins/flow/skills/react/SKILL.md b/plugins/flow/skills/react/SKILL.md deleted file mode 100644 index b5d9af2..0000000 --- a/plugins/flow/skills/react/SKILL.md +++ /dev/null @@ -1,261 +0,0 @@ ---- -name: react -description: "Use when editing React code, .tsx or .jsx files, react imports, components, hooks, state, client components, framework-scoped server components, backend API integration, or React upgrades." ---- - -# React Framework Skill - - - -## Quick Reference - -### Functional Component Pattern - - - -```tsx -import { useState, useEffect, useCallback } from 'react'; - -interface Props { - title: string; - items: Item[]; - onSelect?: (item: Item) => void; -} - -export function ItemList({ title, items, onSelect }: Props) { - const [selected, setSelected] = useState(null); - - const handleSelect = useCallback((item: Item) => { - setSelected(item); - onSelect?.(item); - }, [onSelect]); - - return ( -
-

{title}

-
    - {items.map(item => ( -
  • handleSelect(item)}> - {item.name} -
  • - ))} -
-
- ); -} -``` - -
- -### Custom Hooks - - - -```tsx -function useFetch(url: string) { - const [data, setData] = useState(null); - const [loading, setLoading] = useState(true); - const [error, setError] = useState(null); - - useEffect(() => { - const controller = new AbortController(); - - fetch(url, { signal: controller.signal }) - .then(res => { - if (!res.ok) throw new Error(`HTTP ${res.status}`); - return res.json(); - }) - .then(setData) - .catch(err => { - if (err.name !== 'AbortError') setError(err); - }) - .finally(() => setLoading(false)); - - return () => controller.abort(); - }, [url]); - - return { data, loading, error }; -} -``` - - - -### React 19+ Server Components (When Applicable) - - - -```tsx -// Server Components are framework-scoped (for example Next.js App Router) -// and are not a universal default in plain React + Vite projects. -async function UserProfile({ userId }: { userId: string }) { - const user = await fetchUser(userId); - return
{user.name}
; -} - -// Client Component -'use client'; -export function InteractiveButton({ onClick }: { onClick: () => void }) { - return ; -} -``` - -
- -### Form Handling - - - -```tsx -import { useActionState } from 'react'; - -function ContactForm() { - const [state, formAction, isPending] = useActionState( - async (prevState: FormState, formData: FormData) => { - const result = await submitForm(formData); - return result; - }, - { message: '' } - ); - - return ( -
- - - {state.message &&

{state.message}

} - - ); -} -``` - -
- -### Context Pattern - - - -```tsx -import { createContext, useContext, useState, ReactNode } from 'react'; - -interface ThemeContextType { - theme: 'light' | 'dark'; - toggle: () => void; -} - -const ThemeContext = createContext(null); - -export function ThemeProvider({ children }: { children: ReactNode }) { - const [theme, setTheme] = useState<'light' | 'dark'>('light'); - const toggle = () => setTheme(t => t === 'light' ? 'dark' : 'light'); - - return ( - - {children} - - ); -} - -export function useTheme() { - const context = useContext(ThemeContext); - if (!context) throw new Error('useTheme must be used within ThemeProvider'); - return context; -} -``` - - - -
- - - -## Best Practices - -- Use TypeScript with strict mode -- Prefer functional components with hooks -- Use `useCallback`/`useMemo` only when profiling shows measurable benefit -- Use `key` props correctly (stable, unique identifiers) -- Handle cleanup in `useEffect` return function -- Use Error Boundaries for error handling - - - -## References Index - -- **[Litestar-Vite Integration](references/litestar_vite.md)** — Backend integration with Litestar-Vite plugin. - -## Related Skills - -For comprehensive coverage of these commonly-used React libraries: - -| Library | Skill | Coverage | -|---------|-------|----------| -| TanStack Router/Query/Table/Form | `tanstack` | Full ecosystem | -| Shadcn/ui components | `shadcn` | All components | -| Tailwind CSS | `tailwind` | Styling patterns | - -## Deployment - -### Static Runtimes - -Bundle traditional SPA apps into static sets: - -```bash -vite build -``` - -### Server and Edge Nodes - -Align Server Actions and components to runtimes offering full Server-Side script continuity safely supporting `'use server'` handlers. - ---- - -## CI/CD Actions - - - -Example GitHub Actions workflow for static build: - -```yaml -name: React CI -on: [push, pull_request] - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Setup Node - uses: actions/setup-node@v4 - with: - node-version: '22' - cache: 'npm' - - - run: npm ci - - run: npm run build -``` - - - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [React](https://github.com/cofin/flow/blob/main/templates/styleguides/frameworks/react.md) -- [TypeScript](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/typescript.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. - - -## Validation - -Add validation instructions here. - diff --git a/plugins/flow/skills/react/agents/openai.yaml b/plugins/flow/skills/react/agents/openai.yaml deleted file mode 100644 index 80ad5eb..0000000 --- a/plugins/flow/skills/react/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "React" - short_description: "React TypeScript components, hooks, state, APIs, and framework-aware patterns" diff --git a/plugins/flow/skills/react/references/litestar_vite.md b/plugins/flow/skills/react/references/litestar_vite.md deleted file mode 100644 index be8ed9e..0000000 --- a/plugins/flow/skills/react/references/litestar_vite.md +++ /dev/null @@ -1,71 +0,0 @@ -# Litestar-Vite Integration - -## Setup with VitePlugin - -```python -# Python backend -from litestar import Litestar -from litestar_vite import ViteConfig, VitePlugin - -vite_config = ViteConfig( - mode="spa", - paths=PathConfig(resource_dir="src"), -) - -app = Litestar(plugins=[VitePlugin(config=vite_config)]) -``` - -```typescript -// vite.config.ts -import { defineConfig } from 'vite'; -import react from '@vitejs/plugin-react'; -import { litestarVitePlugin } from 'litestar-vite-plugin'; - -export default defineConfig({ - plugins: [ - react(), - litestarVitePlugin({ input: ['src/main.tsx'] }), - ], -}); -``` - -## React SPA Integration (e.g. TanStack Router) - -When operating in SPA mode (`mode="spa"`), the entire routing lifecycle is managed on the frontend. Ensure your Litestar app maps a catch-all route to serve the `index.html` asset bundle (automatically handled by the VitePlugin in SPA mode) so deep links work locally and in production. - -```tsx -// src/main.tsx -import { StrictMode } from 'react' -import { createRoot } from 'react-dom/client' -import { RouterProvider, createRouter } from '@tanstack/react-router' -import { routeTree } from './routeTree.gen' // Or your manual routes - -const router = createRouter({ routeTree }) - -createRoot(document.getElementById('root')!).render( - - - -) -``` - -## Using Generated Types - -```tsx -import { route } from './generated/routes'; -import type { components } from './generated/schemas'; - -type User = components['schemas']['User']; - -// Type-safe route building -const userUrl = route('users:get', { id: 123 }); -``` - -## CLI Commands - -```bash -litestar assets install # Install deps (NOT npm install) -litestar assets serve # Dev server (NOT npm run dev) -litestar assets build # Production build -litestar assets generate-types # Generate TS types -``` diff --git a/plugins/flow/skills/rust/SKILL.md b/plugins/flow/skills/rust/SKILL.md deleted file mode 100644 index 52ca25e..0000000 --- a/plugins/flow/skills/rust/SKILL.md +++ /dev/null @@ -1,246 +0,0 @@ ---- -name: rust -description: "Use when editing Rust files, .rs, Cargo.toml, Cargo.lock, workspaces, async code, error handling, PyO3, maturin, napi-rs, C ABI, platform support, tests, or performance-critical Rust paths." ---- - -# Rust (Systems & Performance) - -Patterns for multi-crate Rust workspaces targeting cross-platform, high-performance systems with polyglot extension surfaces. Covers workspace layout, async runtimes, platform abstraction, PyO3/maturin Python bindings, napi-rs Node/Bun bindings, C ABI/FFI, error handling, and benchmarking. - -## Code Style - -- Edition 2021, resolver 2. -- Workspace-level lint config in root `Cargo.toml`: - -```toml -[workspace.lints.rust] -unexpected_cfgs = { level = "allow", check-cfg = ['cfg(Py_GIL_DISABLED)'] } - -[workspace.lints.clippy] -too_many_arguments = "allow" -type_complexity = "allow" -``` - -- Crates inherit lints: `[lints] workspace = true`. -- Format: `cargo fmt`. Lint: `cargo clippy -- -D warnings`. -- Use `tracing` (not `log`) for structured instrumentation. -- Document public APIs with `///` doc comments. -- Prefer `Arc` over `Rc` in async contexts. - -## Quick Reference - -### Workspace Setup - -```text -project/ -├── Cargo.toml # [workspace] root -├── crates/ -│ ├── core/ # Pure logic, no FFI deps -│ ├── http/ # Runtime + networking (binary) -│ ├── py/ # PyO3 bindings (cdylib) -│ └── node/ # napi-rs bindings -└── rust-toolchain.toml -``` - -Core crate has zero FFI dependencies. Binding crates wrap it. Pin shared dependencies in workspace root with `[workspace.dependencies]`; crates reference with `{ workspace = true }`. - -### Error Handling Pattern (thiserror) - -```rust -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum AppError { - #[error("IO error: {0}")] - Io(#[from] std::io::Error), - #[error("parse error in {path}: {message}")] - Parse { path: String, message: String }, - #[error("not found: {0}")] - NotFound(String), -} - -pub type Result = std::result::Result; -``` - -### Async Tokio Essentials - -- Use `#[tokio::main]` for binaries; pass runtime handle to libraries. -- Select tokio features per crate -- only the server crate needs `"full"`. -- Use `Arc` for shared state across tasks, never `Rc`. -- Use `tokio::sync::Mutex` only when holding the lock across `.await`; otherwise use `parking_lot::Mutex`. - -### PyO3 Pattern - -```rust -use pyo3::prelude::*; - -#[pyclass(frozen)] // frozen = immutable, safe across threads -#[derive(Clone, Debug)] -pub struct Config { - #[pyo3(get)] - pub name: String, - #[pyo3(get)] - pub max_retries: u32, -} - -#[pymodule] -#[pyo3(name = "_native")] -pub fn pymodule_init(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - Ok(()) -} -``` - - - -## Workflow - -### Step 1: Workspace Layout - -Create a workspace with `resolver = "2"`. Separate pure-logic core from binding crates (py, node, c_abi). Pin all shared dependencies in `[workspace.dependencies]`. - -### Step 2: Error Types - -Define per-crate error enums with `thiserror`. Use `#[from]` for automatic conversion. Add `PyErr` conversion (`From for PyErr`) in binding crates. - -### Step 3: Core Logic - -Write business logic in the core crate with no FFI dependencies. Use `async` for I/O-bound work. Test with `cargo test` and benchmark hot paths with `criterion`. - -### Step 4: Bindings - -Wrap core types/functions in binding crates. For PyO3: use `#[pyclass(frozen)]` for immutable data, `future_into_py` for async. For napi-rs: use `#[napi]` macros. - -### Step 5: Validate - -Run `cargo clippy -- -D warnings`, `cargo fmt --check`, and `cargo test --workspace`. For PyO3: `maturin develop` and run Python tests. - - - - - -## Guardrails - -- **Prefer `Arc` over `Rc` in async code** -- `Rc` is not `Send` and will fail to compile in tokio tasks. Use `Arc` for shared ownership across tasks. -- **Use `thiserror` for library error types** -- provides `#[derive(Error)]` with `Display` and `From` impls. Reserve `anyhow` for binaries/scripts only. -- **Workspace for multi-crate projects** -- centralize dependency versions, lint config, and release profiles. Never duplicate version pins across crates. -- **Core crate has zero FFI deps** -- keep PyO3, napi-rs, and libc out of core. Binding crates depend on core and add FFI. -- **`#[pyclass(frozen)]` for immutable data** -- enables safe sharing across Python threads without per-access locking. -- **`tracing` over `log`** -- structured instrumentation with spans, levels, and subscriber flexibility. -- **Pin `rust-toolchain.toml`** -- ensures consistent compiler version across CI and local builds. - - - - - -### Validation Checkpoint - -Before delivering Rust code, verify: - -- [ ] Workspace uses `resolver = "2"` and `[workspace.dependencies]` -- [ ] Error types use `thiserror` with `#[from]` conversions -- [ ] Async code uses `Arc` (not `Rc`) for shared state -- [ ] Core crate has no FFI dependencies (PyO3, napi-rs, libc) -- [ ] `cargo clippy -- -D warnings` passes -- [ ] Public APIs have `///` doc comments -- [ ] `rust-toolchain.toml` is present and pinned - - - - - -## Example - -**Task:** Error type and async function with proper error handling. - -```rust -// crates/core/src/error.rs -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum StorageError { - #[error("object not found: {key}")] - NotFound { key: String }, - #[error("IO error: {0}")] - Io(#[from] std::io::Error), - #[error("serialization error: {0}")] - Serde(#[from] serde_json::Error), - #[error("connection timeout after {elapsed_ms}ms")] - Timeout { elapsed_ms: u64 }, -} - -pub type Result = std::result::Result; -``` - -```rust -// crates/core/src/store.rs -use std::sync::Arc; -use tokio::fs; -use crate::error::{Result, StorageError}; - -pub struct ObjectStore { - base_path: Arc, -} - -impl ObjectStore { - pub fn new(base_path: impl Into>) -> Self { - Self { base_path: base_path.into() } - } - - /// Read an object by key, returning its bytes. - pub async fn get(&self, key: &str) -> Result> { - let path = format!("{}/{}", self.base_path, key); - fs::read(&path).await.map_err(|e| match e.kind() { - std::io::ErrorKind::NotFound => StorageError::NotFound { - key: key.to_string(), - }, - _ => StorageError::Io(e), - }) - } - - /// Write bytes to an object key. - pub async fn put(&self, key: &str, data: &[u8]) -> Result<()> { - let path = format!("{}/{}", self.base_path, key); - if let Some(parent) = std::path::Path::new(&path).parent() { - fs::create_dir_all(parent).await?; - } - fs::write(&path, data).await?; - Ok(()) - } -} -``` - - - ---- - -## References Index - -For detailed guides and code examples, refer to the following documents in `references/`: - -- **[Workspace Architecture](references/workspace.md)** -- Centralized deps, release profiles, feature flags, module hierarchy. -- **[Async & Concurrency](references/async.md)** -- Tokio patterns, GIL-free async with pyo3_async_runtimes, crossbeam, parking_lot. -- **[PyO3 & Maturin Bindings](references/pyo3.md)** -- Module registration, frozen classes, signature macros, zero-copy, maturin config. -- **[Error Handling](references/errors.md)** -- thiserror 2.0 derive, PyErr conversion, platform-specific errors, From impls. -- **[Platform Abstraction](references/platform.md)** -- Conditional modules per OS, target-specific deps, futex/ulock/WaitOnAddress. -- **[napi-rs Node/Bun Bindings](references/napi.md)** -- Module setup, #[napi] macros, async tasks, TSFN, cross-platform npm distribution. -- **[C ABI & FFI](references/c_abi.md)** -- Stable C ABI, raw pointer patterns, cbindgen, zero-copy for C consumers. -- **[Testing & Benchmarking](references/testing.md)** -- Integration tests, criterion 0.5 benchmarks, CI matrix, maturin develop. - ---- - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Rust](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/rust.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/rust/agents/openai.yaml b/plugins/flow/skills/rust/agents/openai.yaml deleted file mode 100644 index aa37a0c..0000000 --- a/plugins/flow/skills/rust/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Rust" - short_description: "Rust workspaces, async, FFI, errors, testing, platform, PyO3, and napi-rs" diff --git a/plugins/flow/skills/rust/references/async.md b/plugins/flow/skills/rust/references/async.md deleted file mode 100644 index a171072..0000000 --- a/plugins/flow/skills/rust/references/async.md +++ /dev/null @@ -1,126 +0,0 @@ -# Async & Concurrency - -## Tokio as Standard Runtime - -Select tokio features per crate based on actual needs. Only the server/http crate needs `"full"`: - -```toml -# Core crate — minimal tokio -tokio = { workspace = true } - -# HTTP crate — explicit feature selection -tokio = { version = "1.49.0", features = ["rt-multi-thread", "macros", "net", "sync", "fs", "time", "io-util", "signal"] } -``` - -Use `#[tokio::main]` for binaries; pass runtime handle to libraries. - -## GIL-Free Async: future_into_py - -For async Python functions that return awaitables, use `pyo3_async_runtimes::tokio::future_into_py`. This bridges a Rust future into a Python coroutine: - -```rust -use pyo3::prelude::*; -use pyo3::exceptions::PyRuntimeError; - -fn to_py_err(e: impl std::fmt::Display) -> PyErr { - PyRuntimeError::new_err(format!("Operation error: {e}")) -} - -/// Async read — returns a Python awaitable. -#[pyfunction] -pub fn read_object<'py>( - py: Python<'py>, - store: &StorageStore, - path: &str, -) -> PyResult> { - let inner = store.inner().clone(); - let p = StorePath::from(path); - pyo3_async_runtimes::tokio::future_into_py(py, async move { - let result = inner.get(&p).await.map_err(to_py_err)?; - let bytes = result.bytes().await.map_err(to_py_err)?; - Ok(bytes.to_vec()) - }) -} -``` - -## Sync Calls: LazyLock Runtime + py.detach() - -For sync Python functions that must run async Rust code, use a `LazyLock` runtime and `py.detach()` to release the GIL during I/O: - -```rust -use std::sync::LazyLock; - -/// Lazy-initialized shared tokio runtime for sync operations. -static RUNTIME: LazyLock = LazyLock::new(|| { - tokio::runtime::Builder::new_multi_thread() - .worker_threads(2) - .enable_all() - .build() - .expect("Failed to create tokio runtime") -}); - -/// Sync read — releases GIL during I/O. -#[pyfunction] -pub fn read_object_sync( - py: Python<'_>, - store: &StorageStore, - path: &str, -) -> PyResult> { - let inner = store.inner().clone(); - let p = StorePath::from(path); - let bytes = py.detach(|| { - RUNTIME - .block_on(async { inner.get(&p).await?.bytes().await }) - .map_err(to_py_err) - })?; - Ok(PyBytes::new(py, &bytes).unbind()) -} -``` - -For simple blocking operations: - -```rust -/// Sync URL signing — releases GIL during I/O. -#[pyfunction] -#[pyo3(signature = (store, path, expires_in_secs = 3600))] -pub fn signed_url_sync( - py: Python<'_>, - store: &StorageStore, - path: &str, - expires_in_secs: u64, -) -> PyResult { - let signer = store.signer().ok_or_else(not_supported)?; - let p = StorePath::from(path); - let dur = Duration::from_secs(expires_in_secs); - py.detach(|| { - RUNTIME - .block_on(async { signer.signed_url(http::Method::GET, &p, dur).await }) - .map(|url| url.to_string()) - .map_err(to_py_err) - }) -} -``` - -## Crossbeam Channels - -Use crossbeam for MPSC/SPSC channels when you need cross-thread communication without async: - -```toml -[dependencies] -crossbeam-channel = { workspace = true } -crossbeam-utils = { workspace = true } -``` - -## Tokio Utilities - -Use `tokio_util` for compatibility layers and helpers: - -```toml -tokio-util = { version = "0.7", features = ["compat"] } -``` - -Key patterns: - -- `tokio::task::spawn_blocking` for CPU-heavy or blocking FFI calls. -- `CancellationToken` for graceful shutdown. -- `tokio::select!` for racing futures. diff --git a/plugins/flow/skills/rust/references/c_abi.md b/plugins/flow/skills/rust/references/c_abi.md deleted file mode 100644 index deb3ef3..0000000 --- a/plugins/flow/skills/rust/references/c_abi.md +++ /dev/null @@ -1,172 +0,0 @@ -# C ABI & FFI - -## Scope - -- Stable C ABI for distributing Rust libraries to C/C++ and other FFI consumers. -- Raw pointer + length patterns for zero-copy data transfer. -- Cross-language error mapping. -- Safe wrappers around unsafe FFI boundaries. - -## Stable ABI Distribution - -Expose Rust functionality through a C-compatible ABI for maximum interoperability: - -```rust -/// Opaque handle to the Rust engine. -pub struct EngineHandle { - inner: Box, -} - -/// Create a new engine. Returns null on error. -#[no_mangle] -pub extern "C" fn engine_new(config: *const c_char) -> *mut EngineHandle { - let config = unsafe { - if config.is_null() { return std::ptr::null_mut(); } - match std::ffi::CStr::from_ptr(config).to_str() { - Ok(s) => s, - Err(_) => return std::ptr::null_mut(), - } - }; - match Engine::new(config) { - Ok(engine) => Box::into_raw(Box::new(EngineHandle { inner: Box::new(engine) })), - Err(_) => std::ptr::null_mut(), - } -} - -/// Free the engine. Safe to call with null. -#[no_mangle] -pub extern "C" fn engine_free(handle: *mut EngineHandle) { - if !handle.is_null() { - unsafe { drop(Box::from_raw(handle)); } - } -} -``` - -### Header Generation - -Use `cbindgen` to auto-generate C headers: - -```toml -# cbindgen.toml -language = "C" -include_guard = "MY_ENGINE_H" -autogen_warning = "/* Auto-generated by cbindgen. Do not edit. */" - -[export] -include = ["EngineHandle"] -``` - -```bash -cbindgen --config cbindgen.toml --crate my-engine --output include/my_engine.h -``` - -## Raw Pointer + Length Patterns - -### Passing Buffers to C - -```rust -/// Process a buffer. Caller owns input; callee allocates output. -/// Returns length of output, or -1 on error. -#[no_mangle] -pub extern "C" fn engine_process( - handle: *const EngineHandle, - input_ptr: *const u8, - input_len: usize, - output_ptr: *mut *mut u8, - output_len: *mut usize, -) -> i32 { - let handle = unsafe { - if handle.is_null() { return -1; } - &*handle - }; - let input = unsafe { std::slice::from_raw_parts(input_ptr, input_len) }; - - match handle.inner.process(input) { - Ok(result) => { - let mut boxed = result.into_boxed_slice(); - unsafe { - *output_len = boxed.len(); - *output_ptr = boxed.as_mut_ptr(); - } - std::mem::forget(boxed); - 0 - } - Err(_) => -1, - } -} - -/// Free a buffer allocated by the engine. -#[no_mangle] -pub extern "C" fn engine_free_buffer(ptr: *mut u8, len: usize) { - if !ptr.is_null() { - unsafe { drop(Vec::from_raw_parts(ptr, len, len)); } - } -} -``` - -### Zero-Copy Strategies (C) - -| Mechanism | When | -|-----------|------| -| Raw pointer + length | Stable ABI consumers | -| Shared memory (mmap) | Large data, multiple processes | -| Borrowed slices | Short-lived reads within a single call | - -**Rule:** Avoid copying large buffers across FFI. Document ownership clearly -- who allocates, who frees. Always provide a corresponding `_free` function for every allocation. - -## Cross-Language Error Mapping - -### Error Codes - -Use integer error codes for C consumers: - -```rust -#[repr(C)] -pub enum EngineErrorCode { - Ok = 0, - InvalidConfig = 1, - IoError = 2, - Timeout = 3, - Unknown = -1, -} - -/// Get the last error message. Returns null if no error. -/// The returned string is valid until the next API call on this handle. -#[no_mangle] -pub extern "C" fn engine_last_error(handle: *const EngineHandle) -> *const c_char { - let handle = unsafe { - if handle.is_null() { return std::ptr::null(); } - &*handle - }; - match &handle.inner.last_error { - Some(msg) => msg.as_ptr() as *const c_char, - None => std::ptr::null(), - } -} -``` - -### Mapping from CoreError - -```rust -impl From for EngineErrorCode { - fn from(e: CoreError) -> Self { - match e { - CoreError::Config(_) => EngineErrorCode::InvalidConfig, - CoreError::Io(_) => EngineErrorCode::IoError, - CoreError::Timeout(_) => EngineErrorCode::Timeout, - _ => EngineErrorCode::Unknown, - } - } -} -``` - -## Conventions - -- Use `#[no_mangle]` and `extern "C"` for all exported functions. -- Prefix all exported symbols with a namespace (e.g., `engine_`). -- All pointer parameters must be documented for nullability. -- Every allocation function needs a corresponding free function. -- Use opaque handles (pointers to Rust structs) rather than exposing struct layouts. -- Test with `valgrind` or AddressSanitizer to catch memory issues at FFI boundaries. -- Use `cbindgen` to generate headers -- never hand-write them. -- Core crate: no `unsafe` if possible, no FFI deps. Keep unsafe confined to the binding crate. diff --git a/plugins/flow/skills/rust/references/errors.md b/plugins/flow/skills/rust/references/errors.md deleted file mode 100644 index fa50d76..0000000 --- a/plugins/flow/skills/rust/references/errors.md +++ /dev/null @@ -1,108 +0,0 @@ -# Error Handling - -## thiserror 2.0 Derive Pattern - -Use `thiserror` for library errors. Define domain-specific error enums per crate: - -```rust -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum SyncError { - #[error("platform wait is not implemented")] - Unsupported, - #[error("system call failed: {0}")] - Syscall(std::io::Error), -} -``` - -```rust -#[derive(Debug, Error)] -pub enum ShmError { - #[error("invalid shared memory size")] - InvalidSize, - #[error("invalid shared memory name")] - InvalidName, - #[error("system call failed: {0}")] - Sys(#[from] std::io::Error), - #[error("invalid shared memory name: {0}")] - Name(#[from] std::ffi::NulError), - #[error("shared memory mapping failed")] - MapFailed, -} -``` - -For tool/application crates with structured variants: - -```rust -#[derive(Debug, Error)] -pub enum BundlerError { - #[error("IO error: {0}")] - Io(#[from] std::io::Error), - #[error("Parse error in {path}: {message}")] - Parse { path: String, message: String }, - #[error("Transform error: {0}")] - Transform(String), - #[error("Pool exhausted - all workers busy")] - PoolExhausted, -} - -pub type Result = std::result::Result; -``` - -## PyErr Conversion - -Convert Rust errors to Python exceptions. Two patterns: - -### From impl (for crate-level errors) - -```rust -use pyo3::exceptions::PyRuntimeError; -use pyo3::PyErr; - -impl From for PyErr { - fn from(err: BundlerError) -> PyErr { - PyRuntimeError::new_err(err.to_string()) - } -} -``` - -### to_py_err helper (for ad-hoc conversions) - -```rust -fn to_py_err(e: impl std::fmt::Display) -> PyErr { - PyRuntimeError::new_err(format!("Operation error: {e}")) -} - -// Usage in async contexts: -pyo3_async_runtimes::tokio::future_into_py(py, async move { - let result = inner.get(&path).await.map_err(to_py_err)?; - Ok(result.bytes().await.map_err(to_py_err)?.to_vec()) -}) -``` - -## From Impls for Library Error Wrapping - -Use `#[from]` for automatic conversion from upstream errors: - -```rust -#[derive(Debug, Error)] -pub enum CoreError { - #[error("I/O error: {0}")] - Io(#[from] std::io::Error), - #[error("serialization error: {0}")] - Serde(#[from] serde_json::Error), - #[error("sync error: {0}")] - Sync(#[from] SyncError), - #[error("shared memory error: {0}")] - Shm(#[from] ShmError), -} -``` - -## Rules - -- Use `thiserror` for library errors; `anyhow`/`eyre` only in binaries. -- Never panic across FFI boundaries. Catch and convert to PyErr. -- Define domain-specific error enums per crate, not one global enum. -- Use `#[from]` for automatic upstream error wrapping. -- Provide `type Result = std::result::Result` aliases. diff --git a/plugins/flow/skills/rust/references/napi.md b/plugins/flow/skills/rust/references/napi.md deleted file mode 100644 index 98a4772..0000000 --- a/plugins/flow/skills/rust/references/napi.md +++ /dev/null @@ -1,311 +0,0 @@ -# napi-rs (Node/Bun Bindings) - -## Module Setup - -### Cargo.toml - -```toml -[lib] -crate-type = ["cdylib"] - -[dependencies] -napi = { version = "2", features = ["tokio_rt", "serde-json"] } -napi-derive = "2" -my-core = { path = "../core" } - -[build-dependencies] -napi-build = "2" -``` - -### build.rs - -```rust -fn main() { - napi_build::setup(); -} -``` - -### Basic Export - -```rust -use napi_derive::napi; - -#[napi] -pub fn add(a: u32, b: u32) -> u32 { - a + b -} - -#[napi] -pub struct Engine { - inner: core::Engine, -} - -#[napi] -impl Engine { - #[napi(constructor)] - pub fn new(config: String) -> napi::Result { - let inner = core::Engine::new(&config) - .map_err(|e| napi::Error::from_reason(e.to_string()))?; - Ok(Self { inner }) - } - - #[napi] - pub fn process_sync(&self, data: Buffer) -> napi::Result { - let result = self.inner.process(&data) - .map_err(|e| napi::Error::from_reason(e.to_string()))?; - Ok(result.into()) - } -} -``` - -## Async Tasks - -### With Tokio Runtime - -Enable `tokio_rt` feature for built-in Tokio integration: - -```rust -#[napi] -impl Engine { - #[napi] - pub async fn fetch(&self, url: String) -> napi::Result { - // Runs on Tokio runtime, doesn't block JS event loop - let response = self.inner.fetch(&url).await - .map_err(|e| napi::Error::from_reason(e.to_string()))?; - Ok(response.body().into()) - } -} -``` - -### Manual Async Task - -For fine-grained control over task execution: - -```rust -use napi::{Task, Env, JsNumber}; - -struct ComputeTask { - input: Vec, -} - -impl Task for ComputeTask { - type Output = usize; - type JsValue = JsNumber; - - fn compute(&mut self) -> napi::Result { - // Runs on libuv thread pool — off the main JS thread - Ok(heavy_computation(&self.input)) - } - - fn resolve(&mut self, env: Env, output: Self::Output) -> napi::Result { - env.create_uint32(output as u32) - } -} -``` - -## ThreadsafeFunction (TSFN) - -Call JavaScript callbacks from any Rust thread: - -```rust -use napi::threadsafe_function::{ThreadsafeFunction, ThreadSafeCallContext}; - -#[napi] -pub fn start_worker(callback: ThreadsafeFunction) { - std::thread::spawn(move || { - loop { - let message = receive_message(); - callback.call(Ok(message), napi::threadsafe_function::ThreadsafeFunctionCallMode::NonBlocking); - } - }); -} -``` - -**Rules:** - -- Always use `NonBlocking` unless you need backpressure. -- TSFN prevents Node from exiting -- call `unref()` if the callback is optional. -- Clone TSFN to share across threads (it's `Send + Sync`). - -## Buffer Handling - -### Zero-Copy Input - -```rust -#[napi] -pub fn process(data: Buffer) -> napi::Result { - // Buffer provides &[u8] access without copying - let slice: &[u8] = &data; - let result = transform(slice); - Ok(result.into()) -} -``` - -### Typed Arrays - -```rust -#[napi] -pub fn sum_float64(arr: Float64Array) -> f64 { - arr.iter().sum() -} -``` - -### Large Data -- External Buffer - -For data owned by Rust that JS needs to read: - -```rust -#[napi] -pub fn create_large_buffer(env: Env) -> napi::Result { - let data: Vec = generate_large_data(); - // JS gets a view; Rust owns the memory - unsafe { - env.create_buffer_with_borrowed_data( - data.as_ptr(), - data.len(), - data, - |data, _hint| drop(data), - ) - } -} -``` - -### Zero-Copy Strategies (Node) - -| Mechanism | When | -|-----------|------| -| `napi::Buffer` | Binary data transfer | -| `SharedArrayBuffer` | Web workers / threads | - -**Rule:** Avoid copying large buffers across FFI. Use views/slices when lifetime is clear; copy small data (<4KB) for simplicity. - -## Error Mapping - -```rust -use napi::Error; - -#[derive(Debug, thiserror::Error)] -pub enum EngineError { - #[error("config error: {0}")] - Config(String), - #[error("timeout: {0:?}")] - Timeout(std::time::Duration), -} - -impl From for napi::Error { - fn from(e: EngineError) -> napi::Error { - Error::from_reason(e.to_string()) - } -} -``` - -### Cross-Language Error Mapping (Core to Node) - -Map core errors to JavaScript Error objects: - -```rust -impl From for napi::Error { - fn from(e: CoreError) -> napi::Error { - napi::Error::from_reason(e.to_string()) - } -} -``` - -## deno_core Embedding - -For embedding V8/TypeScript execution in Rust: - -```rust -use deno_core::{JsRuntime, RuntimeOptions, op2}; - -#[op2] -#[string] -fn op_greet(#[string] name: String) -> String { - format!("Hello, {name}!") -} - -deno_core::extension!( - my_ext, - ops = [op_greet], -); - -fn create_runtime() -> JsRuntime { - JsRuntime::new(RuntimeOptions { - extensions: vec![my_ext::init_ops()], - ..Default::default() - }) -} -``` - -### Key deno_core Patterns - -- Use `#[op2]` (v2 op macro) for type-safe op registration. -- Use `extension!` macro to bundle ops into loadable extensions. -- `#[serde]` parameter attribute for complex types via serde. -- `OpState` for shared mutable state across ops. -- Prefer `#[string]` over manual v8 string conversion. - -## Bun Compatibility - -- Target **Node-API** (not Node.js-specific APIs) for Bun support. -- Avoid `node:` built-in imports in JavaScript wrapper code. -- Keep the native surface minimal -- complex logic in Rust, thin JS wrapper. -- Test with both `node` and `bun` in CI. - -## Package Distribution - -### package.json - -```json -{ - "name": "@scope/my-package", - "main": "index.js", - "types": "index.d.ts", - "napi": { - "name": "my-package", - "triples": { - "defaults": true, - "additional": [ - "aarch64-apple-darwin", - "aarch64-unknown-linux-gnu", - "aarch64-unknown-linux-musl" - ] - } - }, - "scripts": { - "build": "napi build --release --platform", - "prepublishOnly": "napi prepublish -t npm" - } -} -``` - -### TypeScript Definitions - -napi-rs auto-generates `.d.ts` files. Verify they're included in the package. - -## Testing - -- **Rust:** `cargo test` for core logic. -- **Node:** `vitest` or `jest` for JS API surface. -- **Bun:** `bun test` for Bun compatibility. -- **Async:** Test that async operations don't block the event loop. -- **TSFN:** Test callback invocation from background threads. -- **Memory:** Use `--expose-gc` + `process.memoryUsage()` for leak detection. - -## Conventions - -- Use `#[napi]` macro -- avoid manual `napi::Env` calls when possible. -- Map Rust errors to JS errors consistently via `From` impl. -- Never block the JS event loop -- use async tasks or spawn threads. -- Provide `.d.ts` type definitions for all exports. -- Use `Buffer` for binary data, not `Vec` (avoids copy). -- Thin bindings: binding crates only do type conversion and API surface shaping. - -## Official References - -- -- -- -- -- -- diff --git a/plugins/flow/skills/rust/references/platform.md b/plugins/flow/skills/rust/references/platform.md deleted file mode 100644 index 27f81aa..0000000 --- a/plugins/flow/skills/rust/references/platform.md +++ /dev/null @@ -1,224 +0,0 @@ -# Platform Abstraction - -## Conditional Modules - -Use `#[cfg(target_os = "...")]` to compile platform-specific modules. Provide a common interface via re-exports: - -```rust -//! platform/mod.rs — Platform-specific wait/wake primitives. - -#[cfg(target_os = "linux")] -pub mod linux; -#[cfg(target_os = "macos")] -pub mod macos; -#[cfg(windows)] -pub mod windows; - -#[cfg(target_os = "linux")] -pub(crate) use linux::{wait_on_address, wait_on_address_timeout, wake_all, wake_one}; -#[cfg(target_os = "macos")] -pub(crate) use macos::{wait_on_address, wait_on_address_timeout, wake_all, wake_one}; -#[cfg(windows)] -pub(crate) use windows::{wait_on_address, wait_on_address_timeout, wake_all, wake_one}; - -// Unsupported platform fallback -#[cfg(not(any(target_os = "linux", target_os = "macos", windows)))] -pub(crate) fn wait_on_address( - _word: &core::sync::atomic::AtomicU32, - _expected: u32, -) -> Result<(), crate::sync::SyncError> { - Err(crate::sync::SyncError::Unsupported) -} -``` - -## Target-Specific Dependencies - -Use `[target]` sections in Cargo.toml for OS-specific deps: - -```toml -[target.'cfg(unix)'.dependencies] -libc = { workspace = true } -rustix = { workspace = true, features = ["fs"] } - -[target.'cfg(windows)'.dependencies] -windows-sys = { workspace = true } -``` - -## Linux: futex Syscalls - -Use `libc::SYS_futex` for wait/wake. Always check for `EAGAIN`/`EINTR`: - -```rust -use core::sync::atomic::AtomicU32; -use std::io; -use crate::sync::SyncError; - -pub(crate) fn wait_on_address(word: &AtomicU32, expected: u32) -> Result<(), SyncError> { - let addr = word as *const AtomicU32 as *const u32; - // SAFETY: futex syscall on a valid aligned u32 address. - let res = unsafe { - libc::syscall( - libc::SYS_futex, - addr, - libc::FUTEX_WAIT, - expected, - std::ptr::null::(), - ) - } as i64; - if res == 0 { - return Ok(()); - } - let err = io::Error::last_os_error(); - match err.raw_os_error() { - Some(libc::EAGAIN) | Some(libc::EINTR) => Ok(()), - _ => Err(SyncError::Syscall(err)), - } -} - -pub(crate) fn wake_one(word: &AtomicU32) -> Result<(), SyncError> { - let addr = word as *const AtomicU32 as *const u32; - // SAFETY: futex wake on a valid aligned u32 address. - let res = unsafe { libc::syscall(libc::SYS_futex, addr, libc::FUTEX_WAKE, 1) } as i64; - if res >= 0 { Ok(()) } - else { Err(SyncError::Syscall(io::Error::last_os_error())) } -} -``` - -Linux-specific syscalls like `pidfd_open`: - -```rust -use std::os::fd::{FromRawFd, OwnedFd}; - -/// Open a process file descriptor for monitoring process lifetime. -/// Available on Linux 5.3+ (kernel `pidfd_open(2)`). -pub fn pidfd_open(pid: i32) -> io::Result { - let fd = unsafe { libc::syscall(libc::SYS_pidfd_open, pid, 0) } as i32; - if fd < 0 { - return Err(io::Error::last_os_error()); - } - // SAFETY: pidfd_open returns a valid FD on success. - Ok(unsafe { OwnedFd::from_raw_fd(fd) }) -} -``` - -## macOS: ulock (Feature-Gated) - -Feature-gate ulock behind `macos-ulock`. Default to spin+nanosleep fallback: - -```rust -//! macOS-specific primitives (ulock fast-path, spin+sleep fallback). - -use core::sync::atomic::{AtomicU32, Ordering}; -use std::time::{Duration, Instant}; -use crate::sync::SyncError; - -const SPIN_ITERS: usize = 1024; -const SLEEP_NS: i64 = 1_000_000; // 1ms - -pub(crate) fn wait_on_address(word: &AtomicU32, expected: u32) -> Result<(), SyncError> { - #[cfg(feature = "macos-ulock")] - { return ulock_wait(word, expected); } - spin_sleep_wait(word, expected) -} - -fn spin_sleep_wait(word: &AtomicU32, expected: u32) -> Result<(), SyncError> { - let mut spins = 0usize; - loop { - if word.load(Ordering::Acquire) != expected { - return Ok(()); - } - if spins < SPIN_ITERS { - spins += 1; - std::hint::spin_loop(); - continue; - } - let ts = libc::timespec { tv_sec: 0, tv_nsec: SLEEP_NS }; - unsafe { libc::nanosleep(&ts, std::ptr::null_mut()); } - } -} - -#[cfg(feature = "macos-ulock")] -extern "C" { - fn __ulock_wait(operation: u32, addr: *const u32, value: u64, timeout: u32) -> i32; - fn __ulock_wake(operation: u32, addr: *const u32, wake_value: u64) -> i32; -} -``` - -## Windows: WaitOnAddress - -Use `windows-sys` for kernel wait/wake primitives: - -```rust -use windows_sys::Win32::Foundation::{GetLastError, ERROR_TIMEOUT}; -use windows_sys::Win32::System::Threading::{WaitOnAddress, WakeByAddressSingle, WakeByAddressAll}; - -pub(crate) fn wait_on_address(word: &AtomicU32, expected: u32) -> Result<(), SyncError> { - let expected_val = expected; - // SAFETY: WaitOnAddress on a valid aligned u32 address. - let res = unsafe { - WaitOnAddress( - word as *const AtomicU32 as *const core::ffi::c_void, - &expected_val as *const u32 as *const core::ffi::c_void, - core::mem::size_of::(), - u32::MAX, - ) - }; - if res == 0 { - return Err(SyncError::Syscall(io::Error::from_raw_os_error( - unsafe { GetLastError() } as i32, - ))); - } - Ok(()) -} - -pub(crate) fn wake_one(word: &AtomicU32) -> Result<(), SyncError> { - unsafe { WakeByAddressSingle(word as *const AtomicU32 as *const core::ffi::c_void) }; - Ok(()) -} -``` - -## Shared Memory (Cross-Platform) - -Use platform handles with RAII cleanup: - -```rust -#[cfg(unix)] -pub type ShmHandle = std::os::fd::RawFd; -#[cfg(windows)] -pub type ShmHandle = std::os::windows::io::RawHandle; - -pub struct ShmRegion { - ptr: NonNull, - len: usize, - handle: ShmHandle, - owns_handle: bool, - owns_mapping: bool, -} - -impl Drop for ShmRegion { - fn drop(&mut self) { - unsafe { - #[cfg(unix)] - { - if self.owns_mapping { libc::munmap(self.ptr.as_ptr().cast(), self.len); } - if self.owns_handle { libc::close(self.handle); } - } - #[cfg(windows)] - { - use windows_sys::Win32::Foundation::CloseHandle; - use windows_sys::Win32::System::Memory::UnmapViewOfFile; - if self.owns_mapping { UnmapViewOfFile(self.ptr.as_ptr().cast()); } - if self.owns_handle { CloseHandle(self.handle); } - } - } - } -} -``` - -## Unsafe Discipline - -- Document every `unsafe` block with a `// SAFETY:` comment. -- Isolate platform-specific unsafe in `platform/` modules behind safe wrappers. -- Prefer `rustix` over raw `libc` for POSIX syscalls when available (e.g., `rustix::fs::flock`). -- Specify atomic `Ordering` explicitly. Never default to `SeqCst` without justification. -- Use RAII wrappers for OS handles (fd, mmap, socket). diff --git a/plugins/flow/skills/rust/references/pyo3.md b/plugins/flow/skills/rust/references/pyo3.md deleted file mode 100644 index 4f7d05c..0000000 --- a/plugins/flow/skills/rust/references/pyo3.md +++ /dev/null @@ -1,436 +0,0 @@ -# PyO3 & Maturin Bindings - -## Module Registration - -Register the native extension module with `#[pymodule]`. Add classes and functions explicitly: - -```rust -use pyo3::prelude::*; - -/// Whether this build targets free-threaded Python (3.14t+). -#[cfg(Py_GIL_DISABLED)] -pub const FREE_THREADED: bool = true; -#[cfg(not(Py_GIL_DISABLED))] -pub const FREE_THREADED: bool = false; - -/// The `_native` extension module. -#[pymodule] -#[pyo3(name = "_native")] -pub fn pymodule_init(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add("__version__", env!("CARGO_PKG_VERSION"))?; - m.add("FREE_THREADED", FREE_THREADED)?; - - // Types - m.add_class::()?; - m.add_class::()?; - - // Functions - m.add_function(wrap_pyfunction!(get_batch, m)?)?; - m.add_function(wrap_pyfunction!(submit_results, m)?)?; - - Ok(()) -} -``` - -For modules that never need the GIL, use `gil_used = false`: - -```rust -#[pymodule(gil_used = false)] -fn _http(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add("__free_threaded__", FREE_THREADED)?; - m.add_function(wrap_pyfunction!(serve, m)?)?; - m.add_class::()?; - Ok(()) -} -``` - -## Frozen Classes - -Use `#[pyclass(frozen)]` for immutable config/data classes. This allows safe sharing across threads: - -```rust -#[pyclass(frozen, from_py_object)] -#[derive(Clone, Debug)] -pub struct ObjectMeta { - #[pyo3(get)] - pub path: String, - #[pyo3(get)] - pub size: u64, - #[pyo3(get)] - pub last_modified: String, - #[pyo3(get)] - pub etag: Option, -} - -#[pymethods] -impl ObjectMeta { - fn __repr__(&self) -> String { - format!("ObjectMeta(path={:?}, size={})", self.path, self.size) - } -} -``` - -## Signature Macros - -Use `#[pyo3(signature = (...))]` for keyword-only args and defaults: - -```rust -#[pymethods] -impl PyRequest { - #[new] - #[pyo3(signature = (method, path, query, headers, body, has_body, route_id=None, path_params=None))] - fn new( - py: Python<'_>, - method: String, - path: String, - query: String, - headers: Vec<(String, String)>, - body: &[u8], - has_body: bool, - route_id: Option, - path_params: Option>, - ) -> Self { - Self { - method, path, query, - body: PyBytes::new(py, body).unbind(), - has_body, route_id, path_params, headers, - } - } -} -``` - -For functions with many keyword-only args: - -```rust -#[pyfunction] -#[pyo3(signature = (store, path, data, *, content_type=None))] -pub fn put_object( - py: Python<'_>, - store: &StorageStore, - path: &str, - data: &[u8], - content_type: Option<&str>, -) -> PyResult { /* ... */ } -``` - -## Zero-Copy Batch Access with Arc - -Wrap shared data in `Arc` so Python proxies share the underlying data without copying: - -```rust -use std::sync::Arc; - -/// Vectorized container for a sealed request batch. -/// O(1) GIL time instead of O(N) for building a Python list. -#[pyclass] -pub struct RequestBatch { - pub(crate) snapshot: Arc, -} - -#[pymethods] -impl RequestBatch { - pub fn __len__(&self) -> usize { - self.snapshot.len() - } - - pub fn __getitem__(&self, index: usize) -> PyResult { - if index >= self.snapshot.len() { - return Err(pyo3::exceptions::PyIndexError::new_err( - "batch index out of range", - )); - } - Ok(RequestProxy::new(Arc::clone(&self.snapshot), index)) - } - - pub fn __iter__(slf: PyRef<'_, Self>) -> PyResult> { - let iter = RequestBatchIter { - snapshot: Arc::clone(&slf.snapshot), - index: 0, - }; - Py::new(slf.py(), iter) - } -} -``` - -## Free-Threaded Python Detection - -Detect free-threaded Python (3.14t+) at compile time via `Py_GIL_DISABLED`: - -```rust -#[cfg(Py_GIL_DISABLED)] -pub(crate) const FREE_THREADED: bool = true; -#[cfg(not(Py_GIL_DISABLED))] -pub(crate) const FREE_THREADED: bool = false; -``` - -Allow the cfg in workspace lints: - -```toml -[workspace.lints.rust] -unexpected_cfgs = { level = "allow", check-cfg = ['cfg(Py_GIL_DISABLED)'] } -``` - -Use conditional logic based on GIL status: - -```rust -#[cfg(Py_GIL_DISABLED)] -{ - // Free-threaded path: multiple event loops, no GIL contention -} -#[cfg(not(Py_GIL_DISABLED))] -{ - // GIL path: single event loop, use py.detach() for I/O -} -``` - -## build.rs with pyo3_build_config - -The binding crate needs `pyo3-build-config` to detect Python configuration at build time: - -```toml -# crates/py/Cargo.toml -[build-dependencies] -pyo3-build-config = { version = "0.28.2", features = ["resolve-config"] } -``` - -```rust -// build.rs -fn main() { - pyo3_build_config::use_pyo3_cfgs(); -} -``` - -## Binding Crate Cargo.toml - -The binding crate produces both cdylib (for maturin) and rlib (for embedding): - -```toml -[package] -name = "project-py" -version.workspace = true -edition.workspace = true - -[dependencies] -project-core = { path = "../core" } -pyo3 = { workspace = true } - -[build-dependencies] -pyo3-build-config = { version = "0.28.2", features = ["resolve-config"] } - -[features] -default = [] -extension-module = ["pyo3/extension-module"] - -[lib] -name = "_project" -crate-type = ["cdylib", "rlib"] -``` - -## Maturin pyproject.toml Config - -```toml -[build-system] -requires = ["maturin>=1.5,<2"] -build-backend = "maturin" - -[tool.maturin] -python-source = "src/py" -module-name = "mypackage._native" -manifest-path = "src/rs/project-py/Cargo.toml" -``` - -For standalone crates with `package.metadata.maturin`: - -```toml -# Inside Cargo.toml -[package.metadata.maturin] -python-source = "../../../src/py" -module-name = "mypackage._http" -bindings = "pyo3" -``` - -## GIL Management - -### Release GIL for CPU Work - -Always release the GIL when doing CPU-bound Rust work: - -```rust -#[pymethods] -impl MyClass { - fn compute(&self, py: Python<'_>) -> PyResult> { - let data = self.inner.clone(); - py.allow_threads(move || { - // GIL released — Python threads can run - Ok(data.process()) - }) - } -} -``` - -### When to Hold the GIL - -- Calling Python objects or APIs (`PyDict`, `PyList`, callbacks). -- Accessing `Python<'_>` token for type conversions. -- Creating new Python objects. - -## Buffer Protocol & Zero-Copy - -### Exposing Rust Data to Python - -Use `PyBuffer` for zero-copy access to contiguous Rust data: - -```rust -#[pymethods] -impl RingBuffer { - fn read_into<'py>(&self, py: Python<'py>) -> PyResult> { - let data = self.inner.read()?; - // Zero-copy: creates PyBytes pointing to Rust data - Ok(PyBytes::new(py, &data)) - } -} -``` - -### Accepting Python Buffers - -```rust -#[pyfunction] -fn process_buffer(py: Python<'_>, buf: PyBuffer) -> PyResult { - // Access contiguous memory without copying - let slice = buf.as_slice(py)?; - py.allow_threads(|| Ok(compute_on_slice(slice))) -} -``` - -### memoryview for Large Data - -```rust -#[pymethods] -impl SharedMemoryRegion { - fn as_memoryview<'py>(&self, py: Python<'py>) -> PyResult> { - // SAFETY: Region outlives the memoryview (enforced by Python ref to self) - unsafe { - let ptr = self.inner.as_ptr(); - let len = self.inner.len(); - PyMemoryView::from_raw_parts(py, ptr, len) - } - } -} -``` - -### Zero-Copy Strategies (Python) - -| Mechanism | When | -|-----------|------| -| `PyBuffer` / `memoryview` | Large contiguous data | -| `PyBytes::new(py, &data)` | Immutable byte data | - -**Rule:** Avoid copying large buffers across FFI. Use views/slices when lifetime is clear; copy small data (<4KB) for simplicity. - -## Error Mapping - -Map Rust errors to Python exceptions deterministically: - -```rust -use pyo3::exceptions::{PyValueError, PyRuntimeError, PyIOError}; - -impl From for PyErr { - fn from(err: CoreError) -> PyErr { - match err { - CoreError::Config(msg) => PyValueError::new_err(msg), - CoreError::Io(e) => PyIOError::new_err(e.to_string()), - CoreError::Timeout(d) => PyRuntimeError::new_err( - format!("operation timed out after {d:?}") - ), - } - } -} -``` - -## Async Bridging - -Bridge Tokio futures to Python async: - -```rust -use pyo3_async_runtimes::tokio::future_into_py; - -#[pymethods] -impl AsyncClient { - fn fetch<'py>(&self, py: Python<'py>, url: String) -> PyResult> { - let client = self.inner.clone(); - future_into_py(py, async move { - let resp = client.get(&url).await.map_err(CoreError::from)?; - Ok(resp.body().to_vec()) - }) - } -} -``` - -## Type Stubs (.pyi) - -Provide `.pyi` files for IDE autocompletion and mypy: - -```python -# my_package/_core.pyi -from typing import Optional - -__version__: str - -class MyClass: - def __init__(self, capacity: int) -> None: ... - def compute(self) -> bytes: ... - async def fetch(self, url: str) -> bytes: ... - -def process_buffer(buf: bytes | bytearray | memoryview) -> int: ... -``` - -## Cross-Platform Wheels (cibuildwheel) - -```toml -# pyproject.toml -[tool.cibuildwheel] -before-all = "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y" -environment = { PATH = "$HOME/.cargo/bin:$PATH" } -skip = ["pp*", "*-musllinux_i686"] -``` - -## Project Layout - -```text -project/ -├── Cargo.toml -├── pyproject.toml -├── src/lib.rs # Rust source -├── python/ -│ └── my_package/ -│ ├── __init__.py # Re-exports from _core -│ ├── _core.pyi # Type stubs -│ └── py.typed # PEP 561 marker -└── tests/ - └── test_bindings.py # Python-side tests -``` - -## Testing - -- **Rust side:** `cargo test` with `rlib` crate type. -- **Python side:** `pytest` with `maturin develop` for dev builds. -- **Integration:** Test GIL release under threading (`concurrent.futures.ThreadPoolExecutor`). -- **Memory:** Use `tracemalloc` to verify zero-copy patterns aren't leaking. - -## Conventions - -- Name Rust modules with `_` prefix: `_core`, `_engine`. -- Always add `__version__` from `CARGO_PKG_VERSION`. -- Use `Bound<'py, T>` (not `&T`) for PyO3 0.22+ API. -- Prefer `abi3` when targeting multiple Python versions without recompilation. -- Document Python-visible APIs in both docstrings and `.pyi` stubs. -- Use `extension-module` feature only in cdylib crates, never in the core. - -## Official References - -- -- -- -- -- -- diff --git a/plugins/flow/skills/rust/references/testing.md b/plugins/flow/skills/rust/references/testing.md deleted file mode 100644 index 7e746b5..0000000 --- a/plugins/flow/skills/rust/references/testing.md +++ /dev/null @@ -1,178 +0,0 @@ -# Testing & Benchmarking - -## Integration Tests - -Place integration tests in `tests/` directory. Use environment variables for multi-process test coordination: - -```rust -// tests/test_shm.rs -use project_core::shm::{ShmError, ShmRegion}; -use std::io; -use std::process::Command; - -#[test] -fn shm_multi_process_roundtrip() { - // Child process path - if std::env::var("TEST_SHM_CHILD").is_ok() { - let name = std::env::var("TEST_SHM_NAME").unwrap(); - let size: usize = std::env::var("TEST_SHM_SIZE").unwrap().parse().unwrap(); - let region = ShmRegion::open_named(&name, size).unwrap(); - unsafe { assert_eq!(region.as_slice()[0], 42); } - return; - } - - // Parent process - let name = format!("/test-shm-{}", std::process::id()); - let mut region = match ShmRegion::create_named(&name, 4096) { - Ok(region) => region, - Err(ShmError::Sys(err)) if err.kind() == io::ErrorKind::PermissionDenied => return, - Err(err) => panic!("create_named failed: {err:?}"), - }; - unsafe { region.as_mut_slice()[0] = 42; } - - let exe = std::env::current_exe().unwrap(); - let status = Command::new(&exe) - .env("TEST_SHM_CHILD", "1") - .env("TEST_SHM_NAME", &name) - .env("TEST_SHM_SIZE", "4096") - .arg("--exact") - .arg("shm_multi_process_roundtrip") - .arg("--nocapture") - .status() - .unwrap(); - assert!(status.success()); -} -``` - -## Property-Based Testing with proptest - -Use `proptest` for invariant testing: - -```rust -use proptest::prelude::*; - -proptest! { - #[test] - fn long_names_are_rejected(len in 256usize..512) { - let name = format!("/{}", "a".repeat(len)); - let err = ShmRegion::create_named(&name, 1024).unwrap_err(); - prop_assert!(matches!(err, ShmError::InvalidName | ShmError::Name(_))); - } -} -``` - -## Criterion 0.5 Benchmarks - -Define benchmarks in `benches/` with `harness = false`: - -```toml -# Cargo.toml -[dev-dependencies] -criterion = { workspace = true } - -[[bench]] -name = "spsc" -harness = false - -[[bench]] -name = "codec" -harness = false -required-features = ["compression"] -``` - -Basic benchmark: - -```rust -// benches/spsc.rs -use criterion::{criterion_group, criterion_main, Criterion}; -use std::sync::Arc; -use project_core::buffer::{ChannelMode, RingLayout}; -use project_core::channel::SpscRing; -use project_core::shm::ShmRegion; - -fn bench_spsc_ping_pong(c: &mut Criterion) { - let layout = RingLayout::new(256, 1024, ChannelMode::Spsc); - let alloc = std::alloc::Layout::from_size_align( - layout.total_size, 64, - ).unwrap(); - let ptr = unsafe { std::alloc::alloc(alloc) }; - let region = Arc::new(unsafe { ShmRegion::from_raw(ptr, layout.total_size) }); - let ring = unsafe { SpscRing::initialize(Arc::clone(®ion), layout, 0, 0).unwrap() }; - - let payload = [7u8; 8]; - let mut out = [0u8; 8]; - - c.bench_function("spsc_ping_pong", |b| { - b.iter(|| { - ring.send(&payload, 0).unwrap(); - let _ = ring.recv(&mut out).unwrap(); - }) - }); - - unsafe { std::alloc::dealloc(ptr, alloc); } -} - -criterion_group!(benches, bench_spsc_ping_pong); -criterion_main!(benches); -``` - -Parameterized benchmarks with `BenchmarkId`: - -```rust -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; - -fn bench_wakeup_latency(c: &mut Criterion) { - let mut group = c.benchmark_group("recv_hybrid_wakeup"); - - for delay_us in [10, 50, 100] { - group.bench_with_input( - BenchmarkId::from_parameter(delay_us), - &delay_us, - |b, &delay_us| { - // setup ring, spawn sender with delay, measure recv latency - b.iter(|| { /* ... */ }); - }, - ); - } - group.finish(); -} -``` - -## CI Matrix - -Test across Python versions and platform features: - -```yaml -# CI considerations -# - Python 3.12, 3.13, 3.14t (free-threaded) -# - Cross-platform: linux, macos, windows -# - Feature combinations: default, compression, free-threading -``` - -Build for testing with maturin: - -```bash -# Development build (fast iteration) -maturin develop --uv --release - -# With specific features -maturin develop --uv --release --features compression - -# Run Rust tests -cargo test --workspace - -# Run benchmarks -cargo bench --bench spsc -cargo bench --bench codec --features compression -``` - -## Additional Tools - -- `cargo nextest run` for parallel test execution. -- `cargo +nightly miri test` for unsafe code verification. -- `cargo-llvm-cov` for coverage. -- Address sanitizer for binding layers: - -```bash -RUSTFLAGS="-Zsanitizer=address" cargo +nightly test -Zbuild-std --target x86_64-unknown-linux-gnu -``` diff --git a/plugins/flow/skills/rust/references/workspace.md b/plugins/flow/skills/rust/references/workspace.md deleted file mode 100644 index e13b284..0000000 --- a/plugins/flow/skills/rust/references/workspace.md +++ /dev/null @@ -1,145 +0,0 @@ -# Workspace Architecture - -## Directory Layout - -Structure workspaces by concern with a pure-logic core crate and separate binding crates: - -```text -project/ -├── Cargo.toml # [workspace] root -├── crates/ -│ ├── core/ # Pure logic, no FFI deps -│ │ ├── src/lib.rs -│ │ └── Cargo.toml -│ ├── http/ # Runtime + networking (binary or cdylib) -│ │ └── Cargo.toml # depends on core -│ ├── py/ # PyO3 bindings -│ │ └── Cargo.toml # cdylib + rlib, depends on core -│ └── bundler/ # Optional tool crate -│ └── Cargo.toml -└── rust-toolchain.toml -``` - -Core crate has zero FFI dependencies. Binding crates wrap it. - -## Centralized Dependencies - -Pin shared dependency versions once in the workspace root. Crates reference with `{ workspace = true }`: - -```toml -# Root Cargo.toml -[workspace] -resolver = "2" -members = ["crates/*"] - -[workspace.package] -version = "0.1.0" -edition = "2021" -authors = ["Project Contributors"] -license = "MIT" - -[workspace.dependencies] -arrow = "53.0" -criterion = { version = "0.5", features = ["async_tokio"] } -crossbeam-channel = "0.5" -crossbeam-utils = "0.8" -libc = "0.2" -proptest = "1.6" -rustix = "0.38" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -thiserror = "2.0" -tokio = { version = "1.35", features = ["full"] } -pyo3 = { version = "0.28.2" } -windows-sys = { version = "0.52", features = ["Win32_System_Memory", "Win32_System_Threading"] } -``` - -Individual crates reference workspace deps: - -```toml -# crates/core/Cargo.toml -[package] -name = "project-core" -version.workspace = true -edition.workspace = true - -[dependencies] -crossbeam-channel = { workspace = true } -serde = { workspace = true } -thiserror = { workspace = true } -tokio = { workspace = true } -lz4_flex = { workspace = true, optional = true } -zstd = { workspace = true, optional = true } - -[target.'cfg(unix)'.dependencies] -libc = { workspace = true } -rustix = { workspace = true, features = ["fs"] } - -[target.'cfg(windows)'.dependencies] -windows-sys = { workspace = true } - -[dev-dependencies] -criterion = { workspace = true } -proptest = { workspace = true } - -[lints] -workspace = true -``` - -## Release Profile - -Optimize for maximum performance in release builds: - -```toml -[profile.release] -lto = true -codegen-units = 1 -opt-level = 3 -``` - -## Feature Flags - -Use `dep:name` syntax for conditional dependencies. Group features logically: - -```toml -[features] -default = ["compression"] -compression = ["dep:lz4_flex", "dep:zstd"] -dev-proxy = ["dep:reqwest"] -js-runtime = ["dep:project-js"] -grpc = ["dep:tonic", "dep:prost", "dep:tonic-health", "dep:tonic-reflection"] -free-threading = [] -``` - -Benchmarks can require features: - -```toml -[[bench]] -name = "codec" -harness = false -required-features = ["compression"] -``` - -## Module Hierarchy - -Organize the core crate lib.rs with public re-exports for ergonomics: - -```rust -//! Core IPC primitives and shared types. - -pub const VERSION: &str = env!("CARGO_PKG_VERSION"); - -pub mod async_ring; -pub mod batch; -pub mod buffer; -pub mod channel; -pub mod platform; -pub mod protocol; -pub mod shm; -pub mod sync; -pub mod transport; - -// Re-exports for ergonomics -pub use batch::{BatchArena, BatchError, BatchNode}; -pub use transport::{BatchSnapshot, TransportError, TransportNode}; -``` diff --git a/plugins/flow/skills/saq/SKILL.md b/plugins/flow/skills/saq/SKILL.md deleted file mode 100644 index 2845b7b..0000000 --- a/plugins/flow/skills/saq/SKILL.md +++ /dev/null @@ -1,340 +0,0 @@ ---- -name: saq -description: "Use when editing SAQ task queues, saq imports, background jobs, async workers, enqueueing jobs, CronJob schedules, queue configuration, worker lifecycle, or async-native task processing." ---- - -# SAQ (Simple Async Queue) Skill - -SAQ is a lightweight async task queue built on asyncio. Supports Redis and Postgres backends. Designed for simplicity with async-native patterns — no separate broker process, no class-based tasks, just plain async functions. - -## Code Style Rules - -- Use PEP 604 for unions: `T | None` (not `Optional[T]`) -- **Never** use `from __future__ import annotations` -- Use Google-style docstrings -- All task functions must be `async def` -- First argument of every task function is always the context dict (`ctx`) - -## Quick Reference - -### Queue Creation - -```python -from saq import Queue - -# Redis backend -queue = Queue.from_url("redis://localhost") - -# Postgres backend -queue = Queue.from_url("postgresql+asyncpg://user:pass@localhost/db") -``` - -### Task Definition - -```python -async def send_email(ctx: dict, *, recipient: str, subject: str, body: str) -> None: - """Send an email as a background task. - - Args: - ctx: SAQ context dict (contains queue, job, and custom startup keys). - recipient: Email recipient address. - subject: Email subject line. - body: Email body content. - """ - mailer = ctx["mailer"] # injected via startup hook - await mailer.send(recipient, subject, body) -``` - -### Enqueueing Jobs - -```python -# Fire and forget -await queue.enqueue("send_email", recipient="user@example.com", subject="Hello", body="World") - -# Enqueue and wait for result -result = await queue.apply("send_email", recipient="user@example.com", subject="Hello", body="World") - -# With job options -await queue.enqueue( - "send_email", - recipient="user@example.com", - subject="Hello", - body="World", - timeout=30, - retries=3, - ttl=3600, - key="email-user@example.com", # deduplication key -) -``` - -### CronJob Scheduling - -```python -from saq import CronJob - -# Run at the top of every hour -hourly_report = CronJob( - function=generate_report, - cron="0 * * * *", - timeout=300, -) - -# Run every 15 minutes -health_check = CronJob( - function=check_health, - cron="*/15 * * * *", - timeout=60, - retries=1, -) -``` - -### Worker Setup - -```python -from saq import Worker - -worker = Worker( - queue, - functions=[send_email, process_order, generate_report], - cron_jobs=[hourly_report, health_check], - concurrency=10, - startup=startup_hook, - shutdown=shutdown_hook, - before_process=before_process_hook, - after_process=after_process_hook, -) - -# Run the worker (blocks) -import asyncio -asyncio.run(worker.start()) -``` - -### Job Options Reference - -| Option | Type | Default | Description | -|---|---|---|---| -| `timeout` | `int` | `None` | Seconds before job times out. **Always set this.** | -| `retries` | `int` | `0` | Number of retry attempts on failure | -| `ttl` | `int` | `600` | Seconds to retain result after completion | -| `key` | `str` | `None` | Deduplication key — skip if a job with this key is already queued/active | -| `heartbeat` | `int` | `0` | Seconds between heartbeat updates (use for long-running jobs) | -| `scheduled` | `int` | `0` | Unix timestamp to delay job start | - -### Job Lifecycle - -```text -queued → active → complete - → failed - → aborted -``` - -### Context Dict - -The `ctx` dict passed to every task contains: - -- `ctx["queue"]` — the `Queue` instance -- `ctx["job"]` — the current `Job` object -- Any keys added by your `startup` hook (e.g., `ctx["db"]`, `ctx["mailer"]`) - - - -## Workflow - -### Step 1: Define Task Functions - -Write `async def` functions with `ctx: dict` as the first positional arg and all task parameters as keyword-only args (after `*`). Keep task functions focused — each task does one thing. - -### Step 2: Configure the Queue - -Create a `Queue` using `Queue.from_url()` with your Redis or Postgres DSN. Store the queue instance where it can be shared across your app (module-level, app state, or DI container). - -### Step 3: Define Lifecycle Hooks - -Write `startup` and `shutdown` hooks to initialize and clean up shared resources (DB pools, HTTP clients, mailers). Attach resources to `ctx` in `startup` so all tasks can access them. - -### Step 4: Schedule CronJobs - -Wrap any recurring work in `CronJob` instances with explicit cron expressions and timeouts. Do not use external cron tools (crontab, Kubernetes CronJob) for work that belongs in the queue. - -### Step 5: Create and Run Worker - -Instantiate `Worker` with the queue, task functions, cron jobs, concurrency limit, and lifecycle hooks. Run with `asyncio.run(worker.start())` or integrate into your process manager. - -### Step 6: Enqueue from Application Code - -Call `queue.enqueue()` for fire-and-forget or `queue.apply()` when you need the result. Use the `key` parameter for natural deduplication (e.g., per-user jobs that should not stack). - - - - - -## Guardrails - -- **Always set `timeout`** — the default is no timeout. A hung task will block a worker slot forever. -- **Use `heartbeat` for long-running jobs** — without heartbeat, SAQ may mark a long-active job as stuck and re-queue it. Set heartbeat to roughly 1/3 of expected runtime. -- **Use `CronJob` for scheduled work** — do not schedule SAQ tasks from external cron tools. CronJobs are managed by the worker and participate in the job lifecycle (retries, timeouts, observability). -- **First arg is always `ctx`** — SAQ injects the context dict as the first positional argument. Keyword-only task params come after `*`. -- **Handle graceful shutdown** — call `await worker.stop()` on SIGTERM/SIGINT. Abrupt process kills can leave jobs stranded in `active` state. -- **Use `key` for deduplication** — if the same logical job can be enqueued multiple times (e.g., per-user sync), set a stable `key` to prevent stacking. -- **Set appropriate `concurrency`** — default is 10. Lower for CPU/memory-intensive tasks, higher for I/O-bound tasks. Consider backend connection pool sizes. -- **Do not share mutable state between tasks** — use the context dict (populated per-worker in `startup`) for shared resources like DB pools and HTTP clients. - - - - - -### Validation Checkpoint - -Before delivering SAQ code, verify: - -- [ ] Every task function is `async def` with `ctx: dict` as the first positional arg -- [ ] All task parameters are keyword-only (defined after `*`) -- [ ] `timeout` is set on all long-running jobs and `CronJob` definitions -- [ ] `heartbeat` is set for jobs that run longer than ~30 seconds -- [ ] Shared resources (DB, HTTP client) are initialized in `startup` hook and attached to `ctx` -- [ ] `CronJob` is used for scheduled/recurring work (not external cron) -- [ ] `key` is used where job deduplication is needed -- [ ] Worker handles graceful shutdown - - - - - -## Example - -**Task:** Background email sender with startup hook, cron health check, and deduplication. - -```python -import asyncio -from saq import CronJob, Queue, Worker - - -# --- Shared queue (module-level) --- -queue = Queue.from_url("redis://localhost") - - -# --- Lifecycle hooks --- -async def startup(ctx: dict) -> None: - """Initialize shared resources and attach to context.""" - # Example: async HTTP client for sending email - import httpx - ctx["http"] = httpx.AsyncClient() - - -async def shutdown(ctx: dict) -> None: - """Clean up shared resources.""" - await ctx["http"].aclose() - - -# --- Task definitions --- -async def send_welcome_email(ctx: dict, *, user_id: int, email: str) -> None: - """Send a welcome email to a new user. - - Args: - ctx: SAQ context dict. - user_id: ID of the new user. - email: Recipient email address. - """ - http: httpx.AsyncClient = ctx["http"] - await http.post( - "https://api.email-provider.com/send", - json={"to": email, "template": "welcome", "user_id": user_id}, - ) - - -async def process_export(ctx: dict, *, export_id: int) -> dict: - """Process a data export job. - - Args: - ctx: SAQ context dict. - export_id: ID of the export record to process. - - Returns: - Dict with export result metadata. - """ - # Long-running — heartbeat prevents SAQ from marking it stuck - job = ctx["job"] - # ... processing logic ... - return {"export_id": export_id, "rows": 1000} - - -async def check_queue_health(ctx: dict) -> None: - """Scheduled health check — logs queue stats.""" - q: Queue = ctx["queue"] - info = await q.info() - print(f"Queue stats: {info}") - - -# --- CronJob --- -health_check = CronJob( - function=check_queue_health, - cron="*/5 * * * *", - timeout=30, -) - - -# --- Worker --- -worker = Worker( - queue, - functions=[send_welcome_email, process_export], - cron_jobs=[health_check], - concurrency=10, - startup=startup, - shutdown=shutdown, -) - - -# --- Enqueueing from application code --- -async def on_user_created(user_id: int, email: str) -> None: - await queue.enqueue( - "send_welcome_email", - user_id=user_id, - email=email, - timeout=30, - retries=2, - key=f"welcome-{user_id}", # deduplicate: only one welcome email per user - ) - - -async def start_export(export_id: int) -> None: - await queue.enqueue( - "process_export", - export_id=export_id, - timeout=600, - heartbeat=120, # update heartbeat every 2 minutes - key=f"export-{export_id}", - ) - - -if __name__ == "__main__": - asyncio.run(worker.start()) -``` - - - ---- - -## References Index - -For detailed guides and patterns, refer to the following documents in `references/`: - -- **[Advanced Patterns](references/patterns.md)** -- Heartbeat management, dead letter handling, job chaining, queue priorities, worker lifecycle hooks, Postgres backend. - ---- - -## Official References - -- -- -- - -## Cross-References - -- For Litestar integration (SAQPlugin, DI, web UI, CLI): see `flow:litestar` → litestar-saq section. - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/saq/agents/openai.yaml b/plugins/flow/skills/saq/agents/openai.yaml deleted file mode 100644 index aae7e45..0000000 --- a/plugins/flow/skills/saq/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "SAQ" - short_description: "SAQ async tasks, queues, cron jobs, workers, enqueueing, and lifecycle patterns" diff --git a/plugins/flow/skills/saq/references/patterns.md b/plugins/flow/skills/saq/references/patterns.md deleted file mode 100644 index 6feeecf..0000000 --- a/plugins/flow/skills/saq/references/patterns.md +++ /dev/null @@ -1,363 +0,0 @@ -# SAQ Advanced Patterns - -## Heartbeat Management - -SAQ uses heartbeats to detect stuck jobs. When a job is `active`, the worker periodically updates a heartbeat timestamp. If the timestamp goes stale (beyond the `heartbeat` interval), SAQ considers the job stuck and may re-queue it. - -**Rule of thumb:** set `heartbeat` to ~1/3 of expected job duration. - -```python -# A job expected to run ~10 minutes -await queue.enqueue( - "process_large_file", - file_id=42, - timeout=700, # 700s hard timeout - heartbeat=200, # update heartbeat every ~3 minutes -) -``` - -For tasks where duration is variable, manually trigger heartbeat updates from within the task: - -```python -async def process_large_file(ctx: dict, *, file_id: int) -> None: - job = ctx["job"] - queue: Queue = ctx["queue"] - - for chunk in read_chunks(file_id): - await process_chunk(chunk) - # Manually extend the heartbeat after each chunk - await queue.update(job, heartbeat=time.time()) -``` - -## @monitored_job Decorator Pattern - -A reusable decorator that auto-calculates and refreshes heartbeat intervals for long-running tasks: - -```python -import asyncio -import functools -import time -from collections.abc import Callable -from typing import Any - -from saq import Queue - - -def monitored_job(heartbeat_fraction: float = 0.3) -> Callable: - """Decorator that auto-manages heartbeat for long-running SAQ tasks. - - Spawns a background coroutine that updates the job heartbeat at - `heartbeat_fraction * timeout` intervals. - - Args: - heartbeat_fraction: Fraction of job timeout to use as heartbeat interval. - Defaults to 0.3 (update heartbeat at 30% of timeout elapsed). - """ - def decorator(func: Callable) -> Callable: - @functools.wraps(func) - async def wrapper(ctx: dict, **kwargs: Any) -> Any: - job = ctx["job"] - queue: Queue = ctx["queue"] - timeout = job.timeout or 300 - interval = max(10, int(timeout * heartbeat_fraction)) - - async def _heartbeat_loop() -> None: - while True: - await asyncio.sleep(interval) - await queue.update(job, heartbeat=time.time()) - - task = asyncio.create_task(_heartbeat_loop()) - try: - return await func(ctx, **kwargs) - finally: - task.cancel() - - return wrapper - return decorator - - -# Usage -@monitored_job(heartbeat_fraction=0.25) -async def long_running_export(ctx: dict, *, export_id: int) -> dict: - # Heartbeat auto-managed — no manual updates needed - ... -``` - -## Dead Letter / Failed Job Handling - -SAQ marks jobs as `failed` after exhausting retries. Inspect and reprocess failed jobs: - -```python -from saq import Job, Status - -# List failed jobs -async def get_failed_jobs(queue: Queue) -> list[Job]: - return await queue.jobs(status=Status.FAILED) - -# Retry a specific failed job -async def retry_job(queue: Queue, job_id: str) -> None: - job = await queue.job(job_id) - if job and job.status == Status.FAILED: - await queue.retry(job) - -# Bulk retry all failed jobs -async def retry_all_failed(queue: Queue) -> int: - failed = await queue.jobs(status=Status.FAILED) - for job in failed: - await queue.retry(job) - return len(failed) -``` - -### Exponential Backoff via Retry Delay - -SAQ does not natively support per-retry delay, but you can implement backoff by re-enqueueing with a `scheduled` timestamp: - -```python -import time - -async def send_notification(ctx: dict, *, user_id: int, attempt: int = 0) -> None: - try: - await _send(user_id) - except TransientError: - max_attempts = 5 - if attempt < max_attempts: - backoff_seconds = 2 ** attempt # 1, 2, 4, 8, 16 seconds - await ctx["queue"].enqueue( - "send_notification", - user_id=user_id, - attempt=attempt + 1, - scheduled=int(time.time()) + backoff_seconds, - timeout=30, - ) -``` - -## Job Chaining (Dependencies) - -SAQ has no native DAG support, but jobs can chain by enqueueing the next step from within a task: - -```python -async def step_one(ctx: dict, *, record_id: int) -> None: - result = await process_step_one(record_id) - # Enqueue step two only after step one succeeds - await ctx["queue"].enqueue( - "step_two", - record_id=record_id, - step_one_result=result, - timeout=120, - ) - - -async def step_two(ctx: dict, *, record_id: int, step_one_result: dict) -> None: - await process_step_two(record_id, step_one_result) - await ctx["queue"].enqueue("step_three", record_id=record_id, timeout=60) -``` - -For fan-out patterns (one job spawns many), gather job references and poll: - -```python -async def fan_out_coordinator(ctx: dict, *, batch_ids: list[int]) -> None: - queue: Queue = ctx["queue"] - # Enqueue all child jobs - child_jobs = [ - await queue.enqueue("process_item", item_id=item_id, timeout=60) - for item_id in batch_ids - ] - # Wait for all children (poll with apply semantics) - results = await asyncio.gather(*[ - queue.apply("process_item", item_id=item_id, timeout=60) - for item_id in batch_ids - ]) -``` - -## Queue Priorities via Multiple Queues - -SAQ does not have built-in priority levels, but you can simulate priority with multiple queues and worker pools: - -```python -from saq import Queue, Worker - -# Separate queues by priority -high_priority_queue = Queue.from_url("redis://localhost", name="high") -low_priority_queue = Queue.from_url("redis://localhost", name="low") - -# High-priority worker: more concurrency, dedicated process -high_worker = Worker( - high_priority_queue, - functions=[critical_task, payment_processing], - concurrency=20, -) - -# Low-priority worker: fewer slots, background work -low_worker = Worker( - low_priority_queue, - functions=[send_digest_email, cleanup_old_records], - concurrency=5, -) -``` - -Enqueue to the appropriate queue based on priority: - -```python -await high_priority_queue.enqueue("payment_processing", order_id=123, timeout=30) -await low_priority_queue.enqueue("send_digest_email", user_id=456, timeout=120) -``` - -## Worker Lifecycle Hooks - -All four hooks receive the context dict. Use them to manage shared resources. - -```python -import httpx -from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine - - -async def startup(ctx: dict) -> None: - """Initialize shared resources once per worker process. - - Runs before any jobs are processed. Attach resources to ctx - so all task functions can access them. - """ - ctx["db"] = create_async_engine("postgresql+asyncpg://user:pass@localhost/db") - ctx["http"] = httpx.AsyncClient(timeout=10.0) - ctx["settings"] = load_settings() - - -async def shutdown(ctx: dict) -> None: - """Clean up shared resources when worker stops. - - Runs after all in-flight jobs complete (graceful shutdown). - """ - engine: AsyncEngine = ctx["db"] - await engine.dispose() - http: httpx.AsyncClient = ctx["http"] - await http.aclose() - - -async def before_process(ctx: dict) -> None: - """Called before each individual job starts. - - Use for per-job setup: open a DB session, set up request context, etc. - """ - engine: AsyncEngine = ctx["db"] - ctx["session"] = engine.connect() - - -async def after_process(ctx: dict) -> None: - """Called after each individual job completes (success or failure). - - Use for per-job cleanup: close the DB session, flush metrics, etc. - """ - session = ctx.get("session") - if session: - await session.close() - del ctx["session"] - - -worker = Worker( - queue, - functions=[...], - startup=startup, - shutdown=shutdown, - before_process=before_process, - after_process=after_process, -) -``` - -## Graceful Shutdown - -Handle OS signals to allow in-flight jobs to complete before the process exits: - -```python -import asyncio -import signal - - -async def main() -> None: - worker = Worker(queue, functions=[...], concurrency=10) - - loop = asyncio.get_event_loop() - - def _handle_signal() -> None: - asyncio.create_task(worker.stop()) - - loop.add_signal_handler(signal.SIGTERM, _handle_signal) - loop.add_signal_handler(signal.SIGINT, _handle_signal) - - await worker.start() - - -asyncio.run(main()) -``` - -## Postgres Backend - -Use Postgres when: - -- You need durable job persistence across Redis restarts -- You want to query job history with SQL -- Your infrastructure does not include Redis -- You need transactional job enqueueing (enqueue inside a DB transaction) - -```python -from saq import Queue - -# Postgres backend — requires asyncpg driver -queue = Queue.from_url("postgresql+asyncpg://user:pass@localhost/mydb") -``` - -**Differences from Redis backend:** - -| Aspect | Redis | Postgres | -|---|---|---| -| Persistence | In-memory (AOF/RDB optional) | Durable by default | -| Query job history | Limited | Full SQL access | -| Throughput | Higher | Lower (row locking) | -| Infra requirement | Redis instance | Existing Postgres | -| Transactional enqueue | No | Yes (same connection) | - -**Transactional enqueueing with Postgres:** - -```python -from sqlalchemy.ext.asyncio import AsyncSession - -async def create_order_and_enqueue(session: AsyncSession, order_data: dict) -> None: - # Both the DB write and job enqueue succeed or fail together - async with session.begin(): - order = Order(**order_data) - session.add(order) - await session.flush() - # Enqueue using the same Postgres connection/transaction - await queue.enqueue("process_order", order_id=order.id, timeout=120) -``` - -## Job Deduplication Patterns - -The `key` parameter prevents duplicate jobs from being queued: - -```python -# Per-user sync: only one sync job per user at a time -await queue.enqueue( - "sync_user_data", - user_id=user_id, - key=f"sync-user-{user_id}", - timeout=300, -) - -# Per-resource with versioning: new version supersedes old -await queue.enqueue( - "reindex_document", - doc_id=doc_id, - key=f"reindex-doc-{doc_id}", # replaces any pending reindex for this doc - timeout=60, -) - -# Time-windowed dedup: one report per hour per org -import datetime -hour = datetime.datetime.utcnow().strftime("%Y%m%d%H") -await queue.enqueue( - "generate_hourly_report", - org_id=org_id, - key=f"report-{org_id}-{hour}", - timeout=120, -) -``` diff --git a/plugins/flow/skills/shadcn-tools/SKILL.md b/plugins/flow/skills/shadcn-tools/SKILL.md deleted file mode 100644 index 577868c..0000000 --- a/plugins/flow/skills/shadcn-tools/SKILL.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -name: shadcn-tools -description: "Use when editing shadcn/ui code, components.json, cn() utility, Radix primitives, shadcn add workflows, dialogs, forms, data tables, command palettes, or Tailwind component composition." ---- - -# shadcn/ui (Flow Tools) - - - -## 🚀 Official shadcn/ui Skills (Highly Recommended) - -For component discovery, CLI mastery, and pattern enforcement, we highly recommend installing the official shadcn/ui agent skills: - -- **shadcn**: Official skill for adding components and ensuring proper composition. - -**Installation:** - -```bash -npx skills add shadcn/ui -``` - -## Supplemental Patterns - -The patterns below provide additional context for Flow-specific copy-paste workflows and SPA navigation. - ---- - -## SPA Integration Notes - -When using shadcn/ui components within a Single Page Application (SPA), ensure navigation does not cause full page reloads. Use `asChild` to pass the routing `Link` child directly. - - - -```tsx -import { Link } from '@tanstack/react-router' -import { Button } from "@/components/ui/button" - - -``` - - - - - - -## Guardrails - -- **Use `asChild` for Routing:** When integrating with SPA routers (e.g., TanStack Router, React Router), always use the `asChild` prop on shadcn components to pass the routing `Link` as a child. This prevents invalid nested links and ensures proper event handling. -- **Prefer Semantic Colors:** Use shadcn's semantic color variables (e.g., `text-primary`, `bg-secondary`) instead of hardcoded hex codes or arbitrary Tailwind colors. This ensures the application remains themable and supports dark mode out of the box. -- **Avoid Hardcoded Values:** Never use arbitrary padding, margin, or font sizes. Stick to the Tailwind utility classes provided by the shadcn configuration to maintain design consistency. -- **Keep Components Atomic:** Refactor shadcn components only for global application consistency. Avoid making a component "do too much"; use composition instead. - - - -## Validation - -- **Confirm `cn()` Utility Usage:** Audit component code to ensure the `cn()` utility is used for all class merging, especially when combining base component styles with variant-specific classes or user-provided props. -- **Audit Semantic Theming:** Check that custom styles do not bypass the CSS variable-based theming system (e.g., using `text-red-500` instead of a variable defined in the theme). - diff --git a/plugins/flow/skills/shadcn-tools/agents/openai.yaml b/plugins/flow/skills/shadcn-tools/agents/openai.yaml deleted file mode 100644 index ddd1219..0000000 --- a/plugins/flow/skills/shadcn-tools/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "shadcn/ui Tools" - short_description: "Flow supplemental shadcn/ui, Radix, Tailwind, forms, dialogs, and tables" diff --git a/plugins/flow/skills/shadcn-tools/references/best_practices.md b/plugins/flow/skills/shadcn-tools/references/best_practices.md deleted file mode 100644 index 916730d..0000000 --- a/plugins/flow/skills/shadcn-tools/references/best_practices.md +++ /dev/null @@ -1,47 +0,0 @@ -# ShadCN/ui Best Practices 2026 - -## Core Principles - -### 1. Full Code Ownership - -- **Copy, Don't Install**: Components are added to your source tree (`src/components/ui/`). Modify them freely to fit your design system. -- **Avoid Over-Styling**: Keep local styles minimal. Handle full-page layout in parent containers. - -### 2. Accessibility (Radix Primitives) - -- **`asChild` Prop**: Crucial for SPA routing. Passes navigation behavior to children (e.g., `Link`) without breaking button semantics. -- **Interactive Safeguards**: Never wrap fully interactive components inside button tags. Let Radix handle ARIA attributes. - ---- - -## Routing Integrations - -### 1. TanStack Router (File-Based) - -- **Wrapper Components**: Standard `Sheet` and `Dialog` can cause portal animation overlaps or trigger strict route rendering loops. Wrap them in route-aware contexts if animations get stuck. -- **Component Placement**: Avoid putting temporary or dialog components directly in route files. Place them in `./routes/-components/` or `./features/` to prevent TanStack Router from creating routes for them. -- **Type-Safe Links**: - - ```tsx - import { Link } from '@tanstack/react-router' - import { Button } from "@/components/ui/button" - - - ``` - -### 2. React Router - -- **Standard Support**: Official and mature integration. -- **Layout Composition**: Verify that floating overlays (Portals) resolve correctly inside nested `` structures. - ---- - -## Agent & LLM Best Practices - -When generating UI code using assistants: - -1. **Layout Separation**: Generate components that take styling props for outer margin/padding, rather than baking absolute layouts into the component itself. -2. **Guard Standard Defaults**: Do not change standard Radix hooks inside generated items unless specifically requested. -3. **Type Safety**: Always type inputs securely, especially when integrating with heavy validators like Zod or custom search parameters. diff --git a/plugins/flow/skills/shadcn-tools/references/components.md b/plugins/flow/skills/shadcn-tools/references/components.md deleted file mode 100644 index a58896b..0000000 --- a/plugins/flow/skills/shadcn-tools/references/components.md +++ /dev/null @@ -1,70 +0,0 @@ -# ShadCN Core Components - -## Button - -```tsx -import { Button } from "@/components/ui/button" - -// Variants - - - - -// As child (render as different element) - -``` - -## Input & Label - -```tsx -import { Input } from "@/components/ui/input" -import { Label } from "@/components/ui/label" - -
- - -
-``` - -## Card - -```tsx -import { Card, CardHeader, CardTitle, CardContent } from "@/components/ui/card" - - - Card Title -

Content goes here

-
-``` - -## Select - -```tsx -import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select" - - -``` - ---- - -## Utilities - -### cn() Function - -Merges Tailwind classes intelligently. - -```tsx -import { type ClassValue, clsx } from "clsx" -import { twMerge } from "tailwind-merge" - -export function cn(...inputs: ClassValue[]) { - return twMerge(clsx(inputs)) -} -``` diff --git a/plugins/flow/skills/shadcn-tools/references/dialogs.md b/plugins/flow/skills/shadcn-tools/references/dialogs.md deleted file mode 100644 index de76bd2..0000000 --- a/plugins/flow/skills/shadcn-tools/references/dialogs.md +++ /dev/null @@ -1,53 +0,0 @@ -# Dialogs & Overlays - -## Dialog (Modal) - -```tsx -import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogTrigger } from "@/components/ui/dialog" - - - - - - - - Title - -
Content
-
-
-``` - -## Sheet (Side Panel) - -```tsx -import { Sheet, SheetContent, SheetHeader, SheetTitle, SheetTrigger } from "@/components/ui/sheet" - - - - - - - - Title - - - -``` - -## AlertDialog - -```tsx -import { AlertDialog, AlertDialogAction, AlertDialogCancel, AlertDialogContent, AlertDialogDescription, AlertDialogFooter, AlertDialogHeader, AlertDialogTitle, AlertDialogTrigger } from "@/components/ui/alert-dialog" - - - Delete - - Confirm - - Cancel - Continue - - - -``` diff --git a/plugins/flow/skills/shadcn-tools/references/forms.md b/plugins/flow/skills/shadcn-tools/references/forms.md deleted file mode 100644 index 04c3090..0000000 --- a/plugins/flow/skills/shadcn-tools/references/forms.md +++ /dev/null @@ -1,42 +0,0 @@ -# Forms Integration - -Guidance for integrating ShadCN components with `react-hook-form` and `zod`. - -## Basic Form Pattern - -```tsx -import { useForm } from "react-hook-form" -import { zodResolver } from "@hookform/resolvers/zod" -import { z } from "zod" -import { Form, FormControl, FormField, FormItem, FormLabel, FormMessage } from "@/components/ui/form" - -const formSchema = z.object({ - username: z.string().min(2), -}) - -function ProfileForm() { - const form = useForm>({ - resolver: zodResolver(formSchema), - defaultValues: { username: "" }, - }) - - return ( -
- - ( - - Username - - - - )} - /> - - - - ) -} -``` diff --git a/plugins/flow/skills/shadcn-tools/references/shadcn-docs.md b/plugins/flow/skills/shadcn-tools/references/shadcn-docs.md deleted file mode 100644 index 6ee4534..0000000 --- a/plugins/flow/skills/shadcn-tools/references/shadcn-docs.md +++ /dev/null @@ -1,144 +0,0 @@ -# shadcn/ui - -> shadcn/ui is a collection of beautifully-designed, accessible components and a code distribution platform. It is built with TypeScript, Tailwind CSS, and Radix UI primitives. It supports multiple frameworks including Next.js, Vite, Remix, Astro, and more. Open Source. Open Code. AI-Ready. It also comes with a command-line tool to install and manage components and a registry system to publish and distribute code. - -## Overview - -- [Introduction](https://ui.shadcn.com/docs): Core principles—Open Code, Composition, Distribution, Beautiful Defaults, and AI-Ready design. -- [CLI](https://ui.shadcn.com/docs/cli): Command-line tool for installing and managing components. -- [components.json](https://ui.shadcn.com/docs/components-json): Configuration file for customizing the CLI and component installation. -- [Theming](https://ui.shadcn.com/docs/theming): Guide to customizing colors, typography, and design tokens. -- [Changelog](https://ui.shadcn.com/docs/changelog): Release notes and version history. -- [About](https://ui.shadcn.com/docs/about): Credits and project information. - -## Installation - -- [Next.js](https://ui.shadcn.com/docs/installation/next): Install shadcn/ui in a Next.js project. -- [Vite](https://ui.shadcn.com/docs/installation/vite): Install shadcn/ui in a Vite project. -- [Remix](https://ui.shadcn.com/docs/installation/remix): Install shadcn/ui in a Remix project. -- [Astro](https://ui.shadcn.com/docs/installation/astro): Install shadcn/ui in an Astro project. -- [Laravel](https://ui.shadcn.com/docs/installation/laravel): Install shadcn/ui in a Laravel project. -- [Gatsby](https://ui.shadcn.com/docs/installation/gatsby): Install shadcn/ui in a Gatsby project. -- [React Router](https://ui.shadcn.com/docs/installation/react-router): Install shadcn/ui in a React Router project. -- [TanStack Router](https://ui.shadcn.com/docs/installation/tanstack-router): Install shadcn/ui in a TanStack Router project. -- [TanStack Start](https://ui.shadcn.com/docs/installation/tanstack): Install shadcn/ui in a TanStack Start project. -- [Manual Installation](https://ui.shadcn.com/docs/installation/manual): Manually install shadcn/ui without the CLI. - -## Components - -### Form & Input - -- [Form](https://ui.shadcn.com/docs/components/form): Building forms with React Hook Form and Zod validation. -- [Field](https://ui.shadcn.com/docs/components/field): Field component for form inputs with labels and error messages. -- [Button](https://ui.shadcn.com/docs/components/button): Button component with multiple variants. -- [Button Group](https://ui.shadcn.com/docs/components/button-group): Group multiple buttons together. -- [Input](https://ui.shadcn.com/docs/components/input): Text input component. -- [Input Group](https://ui.shadcn.com/docs/components/input-group): Input component with prefix and suffix addons. -- [Input OTP](https://ui.shadcn.com/docs/components/input-otp): One-time password input component. -- [Textarea](https://ui.shadcn.com/docs/components/textarea): Multi-line text input component. -- [Checkbox](https://ui.shadcn.com/docs/components/checkbox): Checkbox input component. -- [Radio Group](https://ui.shadcn.com/docs/components/radio-group): Radio button group component. -- [Select](https://ui.shadcn.com/docs/components/select): Select dropdown component. -- [Switch](https://ui.shadcn.com/docs/components/switch): Toggle switch component. -- [Slider](https://ui.shadcn.com/docs/components/slider): Slider input component. -- [Calendar](https://ui.shadcn.com/docs/components/calendar): Calendar component for date selection. -- [Date Picker](https://ui.shadcn.com/docs/components/date-picker): Date picker component combining input and calendar. -- [Combobox](https://ui.shadcn.com/docs/components/combobox): Searchable select component with autocomplete. -- [Label](https://ui.shadcn.com/docs/components/label): Form label component. - -### Layout & Navigation - -- [Accordion](https://ui.shadcn.com/docs/components/accordion): Collapsible accordion component. -- [Breadcrumb](https://ui.shadcn.com/docs/components/breadcrumb): Breadcrumb navigation component. -- [Navigation Menu](https://ui.shadcn.com/docs/components/navigation-menu): Accessible navigation menu with dropdowns. -- [Sidebar](https://ui.shadcn.com/docs/components/sidebar): Collapsible sidebar component for app layouts. -- [Tabs](https://ui.shadcn.com/docs/components/tabs): Tabbed interface component. -- [Separator](https://ui.shadcn.com/docs/components/separator): Visual divider between content sections. -- [Scroll Area](https://ui.shadcn.com/docs/components/scroll-area): Custom scrollable area with styled scrollbars. -- [Resizable](https://ui.shadcn.com/docs/components/resizable): Resizable panel layout component. - -### Overlays & Dialogs - -- [Dialog](https://ui.shadcn.com/docs/components/dialog): Modal dialog component. -- [Alert Dialog](https://ui.shadcn.com/docs/components/alert-dialog): Alert dialog for confirmation prompts. -- [Sheet](https://ui.shadcn.com/docs/components/sheet): Slide-out panel component (drawer). -- [Drawer](https://ui.shadcn.com/docs/components/drawer): Mobile-friendly drawer component using Vaul. -- [Popover](https://ui.shadcn.com/docs/components/popover): Floating popover component. -- [Tooltip](https://ui.shadcn.com/docs/components/tooltip): Tooltip component for additional context. -- [Hover Card](https://ui.shadcn.com/docs/components/hover-card): Card that appears on hover. -- [Context Menu](https://ui.shadcn.com/docs/components/context-menu): Right-click context menu. -- [Dropdown Menu](https://ui.shadcn.com/docs/components/dropdown-menu): Dropdown menu component. -- [Menubar](https://ui.shadcn.com/docs/components/menubar): Horizontal menubar component. -- [Command](https://ui.shadcn.com/docs/components/command): Command palette component (cmdk). - -### Feedback & Status - -- [Alert](https://ui.shadcn.com/docs/components/alert): Alert component for messages and notifications. -- [Toast](https://ui.shadcn.com/docs/components/toast): Toast notification component using Sonner. -- [Progress](https://ui.shadcn.com/docs/components/progress): Progress bar component. -- [Spinner](https://ui.shadcn.com/docs/components/spinner): Loading spinner component. -- [Skeleton](https://ui.shadcn.com/docs/components/skeleton): Skeleton loading placeholder. -- [Badge](https://ui.shadcn.com/docs/components/badge): Badge component for labels and status indicators. -- [Empty](https://ui.shadcn.com/docs/components/empty): Empty state component for no data scenarios. - -### Display & Media - -- [Avatar](https://ui.shadcn.com/docs/components/avatar): Avatar component for user profiles. -- [Card](https://ui.shadcn.com/docs/components/card): Card container component. -- [Table](https://ui.shadcn.com/docs/components/table): Table component for displaying data. -- [Data Table](https://ui.shadcn.com/docs/components/data-table): Advanced data table with sorting, filtering, and pagination. -- [Chart](https://ui.shadcn.com/docs/components/chart): Chart components using Recharts. -- [Carousel](https://ui.shadcn.com/docs/components/carousel): Carousel component using Embla Carousel. -- [Aspect Ratio](https://ui.shadcn.com/docs/components/aspect-ratio): Container that maintains aspect ratio. -- [Typography](https://ui.shadcn.com/docs/components/typography): Typography styles and components. -- [Item](https://ui.shadcn.com/docs/components/item): Generic item component for lists and menus. -- [Kbd](https://ui.shadcn.com/docs/components/kbd): Keyboard shortcut display component. - -### Misc - -- [Collapsible](https://ui.shadcn.com/docs/components/collapsible): Collapsible container component. -- [Toggle](https://ui.shadcn.com/docs/components/toggle): Toggle button component. -- [Toggle Group](https://ui.shadcn.com/docs/components/toggle-group): Group of toggle buttons. -- [Pagination](https://ui.shadcn.com/docs/components/pagination): Pagination component for lists and tables. - -## Dark Mode - -- [Dark Mode](https://ui.shadcn.com/docs/dark-mode): Overview of dark mode implementation. -- [Dark Mode - Next.js](https://ui.shadcn.com/docs/dark-mode/next): Dark mode setup for Next.js. -- [Dark Mode - Vite](https://ui.shadcn.com/docs/dark-mode/vite): Dark mode setup for Vite. -- [Dark Mode - Astro](https://ui.shadcn.com/docs/dark-mode/astro): Dark mode setup for Astro. -- [Dark Mode - Remix](https://ui.shadcn.com/docs/dark-mode/remix): Dark mode setup for Remix. - -## Forms - -- [Forms Overview](https://ui.shadcn.com/docs/forms): Guide to building forms with shadcn/ui. -- [React Hook Form](https://ui.shadcn.com/docs/forms/react-hook-form): Using shadcn/ui with React Hook Form. -- [TanStack Form](https://ui.shadcn.com/docs/forms/tanstack-form): Using shadcn/ui with TanStack Form. -- [Forms - Next.js](https://ui.shadcn.com/docs/forms/next): Building forms in Next.js with Server Actions. - -## Advanced - -- [Monorepo](https://ui.shadcn.com/docs/monorepo): Using shadcn/ui in a monorepo setup. -- [React 19](https://ui.shadcn.com/docs/react-19): React 19 support and migration guide. -- [Tailwind CSS v4](https://ui.shadcn.com/docs/tailwind-v4): Tailwind CSS v4 support and setup. -- [JavaScript](https://ui.shadcn.com/docs/javascript): Using shadcn/ui with JavaScript (no TypeScript). -- [Figma](https://ui.shadcn.com/docs/figma): Figma design resources. -- [v0](https://ui.shadcn.com/docs/v0): Generating UI with v0 by Vercel. - -## MCP Server - -- [MCP Server](https://ui.shadcn.com/docs/mcp): Model Context Protocol server for AI integrations. Allows AI assistants to browse, search, and install components from registries using natural language. Works with Claude Code, Cursor, VS Code (GitHub Copilot), Codex and more. - -## Registry - -- [Registry Overview](https://ui.shadcn.com/docs/registry): Creating and publishing your own component registry. -- [Getting Started](https://ui.shadcn.com/docs/registry/getting-started): Set up your own registry. -- [Examples](https://ui.shadcn.com/docs/registry/examples): Example registries. -- [FAQ](https://ui.shadcn.com/docs/registry/faq): Common questions about registries. -- [Authentication](https://ui.shadcn.com/docs/registry/authentication): Adding authentication to your registry. -- [Registry MCP](https://ui.shadcn.com/docs/registry/mcp): MCP integration for registries. - -### Registry Schemas - -- [Registry Schema](https://ui.shadcn.com/schema/registry.json): JSON Schema for registry index files. Defines the structure for a collection of components, hooks, pages, etc. Requires name, homepage, and items array. -- [Registry Item Schema](https://ui.shadcn.com/schema/registry-item.json): JSON Schema for individual registry items. Defines components, hooks, themes, and other distributable code with properties for dependencies, files, Tailwind config, CSS variables, and more. diff --git a/plugins/flow/skills/shadcn-tools/references/tables.md b/plugins/flow/skills/shadcn-tools/references/tables.md deleted file mode 100644 index d72bb31..0000000 --- a/plugins/flow/skills/shadcn-tools/references/tables.md +++ /dev/null @@ -1,47 +0,0 @@ -# Data Tables - -## Concept - -ShadCN uses `@tanstack/react-table` to provide headless table logic, which you then style yourself using the core `
` component. - ---- - -## Standard Pattern - -```tsx -import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table" -import { useReactTable, flexRender } from "@tanstack/react-table" - -function DataTable({ columns, data }: { columns: ColumnDef[], data: TData[] }) { - const table = useReactTable({ data, columns, getCoreRowModel: getCoreRowModel() }) - - return ( -
-
- - {table.getHeaderGroups().map((headerGroup) => ( - - {headerGroup.headers.map((header) => ( - - {flexRender(header.column.columnDef.header, header.getContext())} - - ))} - - ))} - - - {table.getRowModel().rows.map((row) => ( - - {row.getVisibleCells().map((cell) => ( - - {flexRender(cell.column.columnDef.cell, cell.getContext())} - - ))} - - ))} - -
- - ) -} -``` diff --git a/plugins/flow/skills/sphinx/SKILL.md b/plugins/flow/skills/sphinx/SKILL.md deleted file mode 100644 index 32edafb..0000000 --- a/plugins/flow/skills/sphinx/SKILL.md +++ /dev/null @@ -1,273 +0,0 @@ ---- -name: sphinx -description: "Use when editing Sphinx docs, conf.py, .rst files, docs/source, autodoc, Read the Docs builds, Shibuya or Immaterial themes, Wasm extensions, VHS terminal recordings, or Sphinx CI." ---- - -# Sphinx Skill - -Expert knowledge for maintaining and expanding Sphinx documentation workspaces. - -## Quick Reference - -### conf.py Setup - -```python -# docs/conf.py -project = "MyProject" -copyright = "2025, My Org" -author = "My Org" - -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.intersphinx", - "sphinx.ext.napoleon", - "sphinx.ext.viewcode", - "sphinx_copybutton", - "sphinx_design", -] - -# Theme (choose one) -html_theme = "shibuya" # or "sphinx_immaterial" -html_static_path = ["_static"] - -# Autodoc -autodoc_member_order = "bysource" -autodoc_typehints = "description" -autodoc_class_signature = "separated" - -# Intersphinx (cross-project links) -intersphinx_mapping = { - "python": ("https://docs.python.org/3", None), - "sqlalchemy": ("https://docs.sqlalchemy.org/en/20/", None), -} -``` - -### Key RST Patterns - -```rst -.. Title and sections (heading hierarchy) -========== -Page Title -========== - -Section -------- - -Subsection -^^^^^^^^^^ - -.. Cross-references -:ref:`label-name` -:doc:`other-page` -:func:`mymodule.myfunction` - -.. Autodoc directives -.. automodule:: mypackage.module - :members: - :undoc-members: - :show-inheritance: - -.. autoclass:: mypackage.MyClass - :members: - :special-members: __init__ - -.. Code blocks -.. code-block:: python - - def hello(): - print("world") - -.. Include from file with markers -.. literalinclude:: ../../examples/demo.py - :language: python - :start-after: # start-example - :end-before: # end-example - -.. Admonitions -.. note:: - Important information here. - -.. warning:: - Dangerous operation ahead. -``` - -### Autodoc Configuration - -- `autodoc_member_order = "bysource"` -- preserves source order (not alphabetical). -- `autodoc_typehints = "description"` -- puts type hints in parameter descriptions, not signatures. -- `napoleon` extension -- enables Google-style and NumPy-style docstrings. -- `intersphinx` -- links to external project docs (Python stdlib, SQLAlchemy, etc.) without duplicating content. - - - -## Workflow - -### Step 1: Project Structure - -Set up the docs directory with `conf.py`, `index.rst`, and section directories. Use a hidden toctree in `index.rst` for navigation. - -```text -docs/ -├── conf.py -├── index.rst -├── getting-started/ -│ ├── index.rst -│ └── installation.rst -├── api/ -│ ├── index.rst -│ └── modules.rst -├── _static/ -└── _templates/ -``` - -### Step 2: Configure Extensions - -Enable `autodoc`, `intersphinx`, `napoleon`, `viewcode`, and theme-specific extensions. Pin Sphinx and extension versions in `pyproject.toml`. - -### Step 3: Write Content - -Split long guides into per-topic pages. Keep each page scoped to one concept. Use `literalinclude` with markers for code examples. Prefer `sphinx_design` grids and cards for navigation hubs. - -### Step 4: Build and Test - -```bash -# Local build -sphinx-build -b html docs/ docs/_build/html -W --keep-going - -# Watch mode (with sphinx-autobuild) -sphinx-autobuild docs/ docs/_build/html -``` - -### Step 5: CI/CD Integration - -Add a GitHub Actions workflow that builds docs on every PR. Fail the build on warnings (`-W` flag). Deploy to GitHub Pages or ReadTheDocs on merge to main. - - - - - -## Guardrails - -- **Pin Sphinx version** -- specify `sphinx>=8.0,<9` in `pyproject.toml` to prevent surprise breaking changes. Pin extension versions too. -- **Use intersphinx for cross-project links** -- never hardcode URLs to external docs. Use `:func:`, `:class:`, `:doc:` roles with intersphinx mappings. -- **Test builds in CI** -- run `sphinx-build -W` (warnings as errors) in CI. Catch broken references, missing modules, and RST syntax errors before merge. -- **`autodoc_typehints = "description"`** -- keeps signatures readable; type info appears in parameter docs. -- **One concept per page** -- split long guides into focused pages linked via toctree. Readers find content faster. -- **`literalinclude` over inline code** -- keeps examples runnable and testable. Use `start-after`/`end-before` markers. - - - - - -### Validation Checkpoint - -Before delivering Sphinx configurations, verify: - -- [ ] Sphinx and extension versions are pinned in pyproject.toml -- [ ] `intersphinx_mapping` is configured for all external references -- [ ] `sphinx-build -W` completes without warnings -- [ ] Autodoc picks up all public modules/classes -- [ ] Cross-references (`:ref:`, `:doc:`, `:func:`) resolve correctly -- [ ] CI workflow builds docs and fails on warnings - - - - - -## Example - -**Task:** Minimal conf.py and RST page with autodoc. - -**`docs/conf.py`:** - -```python -project = "Acme" -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.intersphinx", - "sphinx.ext.napoleon", - "sphinx.ext.viewcode", - "sphinx_copybutton", - "sphinx_design", -] - -html_theme = "shibuya" - -autodoc_member_order = "bysource" -autodoc_typehints = "description" - -intersphinx_mapping = { - "python": ("https://docs.python.org/3", None), -} -``` - -**`docs/index.rst`:** - -```rst -===== -Acme -===== - -Welcome to Acme's documentation. - -.. toctree:: - :hidden: - :maxdepth: 2 - - getting-started/index - api/index -``` - -**`docs/api/index.rst`:** - -```rst -============= -API Reference -============= - -.. automodule:: acme.core - :members: - :undoc-members: - :show-inheritance: - -.. autoclass:: acme.client.AcmeClient - :members: - :special-members: __init__ -``` - - - ---- - -## References Index - -For detailed guides on specific themes and extensions, refer to the following documents: - -### Themes - -- **[Sphinx Immaterial Theme](references/immaterial-theme.md)** -- Configuration for the Material Design theme. -- **[Shibuya Theme](references/shibuya.md)** -- Configuration for the Shibuya theme. - -### Extensions & Demos - -- **[Wasm Playground](references/wasm-playground.md)** -- Integrating interactive Wasm playgrounds. -- **[VHS Terminal Recordings](references/vhs-demos.md)** -- Guidelines for creating and embedding VHS recordings. - -### Infrastructure - -- **[CI/CD Pipelines](references/ci-cd.md)** -- GitHub Actions workflows for building and deploying documentation. - ---- - -## Official References - -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- [Python](https://github.com/cofin/flow/blob/main/templates/styleguides/languages/python.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/sphinx/agents/openai.yaml b/plugins/flow/skills/sphinx/agents/openai.yaml deleted file mode 100644 index 242062c..0000000 --- a/plugins/flow/skills/sphinx/agents/openai.yaml +++ /dev/null @@ -1,3 +0,0 @@ -interface: - display_name: "Sphinx" - short_description: "Sphinx docs, conf.py, RST, autodoc, themes, CI, and doc builds" diff --git a/plugins/flow/skills/sphinx/references/ci-cd.md b/plugins/flow/skills/sphinx/references/ci-cd.md deleted file mode 100644 index f8ad89f..0000000 --- a/plugins/flow/skills/sphinx/references/ci-cd.md +++ /dev/null @@ -1,100 +0,0 @@ - -# Sphinx CI/CD Workflow - -## Overview - -Use this skill to build robust, automated documentation pipelines inside GitHub Actions. Complex documentation workflows often separate asset building templates (shell recorders like VHS) from core compilation nodes to keep static outputs fresh upon landing merges. - -## Workflow Setup (`.github/workflows/docs.yml`) - -### 1. Stage 1: Generate Dynamic Assets (VHS) - -Run shell scripts mapping to animations BEFORE compiling Sphinx. This ensures the compilation stage receives fresh artifact caches. - -```yaml -jobs: - generate-demos: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Install supporting components - run: sudo apt-get install -y ffmpeg ttyd - - - name: Install VHS (via Go) - uses: actions/setup-go@v5 - with: - go-version: ">=1.21" - - - run: | - go install github.com/charmbracelet/vhs@latest - echo "$HOME/go/bin" >> $GITHUB_PATH - - - name: Compile Tape Scripts - run: | - for tape in docs/_tapes/*.tape; do - vhs "$tape" - done - - - name: Cache rendered visuals - uses: actions/upload-artifact@v4 - with: - name: demo-visuals - path: docs/_static/demos/ -``` - -### 2. Stage 2: Sphinx Compilation - -Pull generated assets into standard template builds utilizing standard workflows. - -```yaml - build-docs: - needs: generate-demos - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Download visuals cache - uses: actions/download-artifact@v4 - with: - name: demo-visuals - path: docs/_static/demos/ - - - name: Install uv - uses: astral-sh/setup-uv@v5 - - - name: Compile Static Docs - run: | - uv sync --all-extras --dev - uv run sphinx-build -b html docs docs/_build/html - - - name: Upload build payload - uses: actions/upload-pages-artifact@v3 - with: - path: docs/_build/html/ -``` - -### 3. Stage 3: Deploy - -Push strictly from the compiled payload artifact sets: - -```yaml - deploy: - needs: build-docs - runs-on: ubuntu-latest - permissions: - pages: write - id-token: write - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - steps: - - id: deployment - uses: actions/deploy-pages@v4 -``` - -## Best Practices - -- **Separate Pre-renderers**: Keeps asset builder failures isolated from core page compilation cycles; easily skips tape generation if triggers specify visual frames didn't mutate. -- **Package Management with `uv`**: Avoid slow legacy standard setups; standard sync groups accelerate pipeline steps immensely when utilizing multi-threaded synchronization models. -- **Sandbox permissions**: Avoid Supply Chain vulnerabilities by loading script execution tools strictly inside isolated build components without mounting secret keys during demo rendering. diff --git a/plugins/flow/skills/sphinx/references/immaterial-theme.md b/plugins/flow/skills/sphinx/references/immaterial-theme.md deleted file mode 100644 index ac3e853..0000000 --- a/plugins/flow/skills/sphinx/references/immaterial-theme.md +++ /dev/null @@ -1,153 +0,0 @@ - -# Sphinx Immaterial Theme Workflow - -## Overview - -Use this skill to configure and optimize the `sphinx-immaterial` theme for documentation projects. It provides a modern, responsive interface with rich navigation, search, and code annotation features. - -## Theme Selection: Immaterial vs Shibuya - -When choosing between `sphinx-immaterial` and `sphinx-shibuya`: - -| Feature/Aspect | Sphinx Immaterial | Sphinx Shibuya | -| :--- | :--- | :--- | -| **Aesthetic** | Google Material Design (dense, card-like) | Minimalist, clean layout | -| **Ideal For** | API-heavy docs, dense references, dark mode | Reading-focused guides, structured tutorials | -| **Best Feature** | Sticky TOC, Code annotations, Rich Search | Multi-level sidebar, elegant typography | - -Choose **Immaterial** if you want absolute feature density and interactive elements (like annotations) that make documenting technical systems highly scannable. - -## Configuration (`docs/conf.py`) - -### 1. Dependencies - -Ensure your environment configuration (e.g., `pyproject.toml`) includes: - -```toml -[project.optional-dependencies] -docs = [ - "sphinx>=8.0.0", - "sphinx-immaterial>=0.13.0", - "myst-parser>=4.0.0", - "sphinx-copybutton>=0.5.0", - "sphinx-design>=0.6.0", -] -``` - -### 2. Theme Setup - -```python -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.intersphinx", - "sphinx.ext.napoleon", - "myst_parser", - "sphinx_copybutton", - "sphinx_design", - "sphinx_immaterial", -] - -html_theme = "sphinx_immaterial" -html_static_path = ["_static"] -html_css_files = ["custom.css"] - -html_theme_options = { - "icon": { - "repo": "fontawesome/brands/github", - "logo": "material/database", # Adjust as workspace requires - }, - "palette": [ - { - "media": "(prefers-color-scheme: light)", - "scheme": "default", - "primary": "light-green", - "accent": "light-blue", - "toggle": { - "icon": "material/lightbulb", - "name": "Switch to dark mode", - }, - }, - { - "media": "(prefers-color-scheme: dark)", - "scheme": "slate", - "primary": "light-green", - "accent": "light-blue", - "toggle": { - "icon": "material/lightbulb-outline", - "name": "Switch to light mode", - }, - }, - ], - "features": [ - "content.action.edit", - "content.action.view", - "content.code.annotate", - "content.code.copy", - "navigation.expand", - "navigation.footer", - "navigation.instant", - "navigation.sections", - "navigation.tabs", - "navigation.tabs.sticky", - "navigation.top", - "navigation.tracking", - "search.highlight", - "search.share", - "toc.follow", - "toc.sticky", - ], -} -``` - -### 3. Custom Admonitions - -Add custom directive nodes in `conf.py` to style specific system alerts: - -```python -sphinx_immaterial_custom_admonitions = [ - { - "name": "system-note", - "title": "System Note", - "icon": "material/cogs", - "color": (11, 87, 208), # RGB tuple for theme borders - "classes": ["info"], - }, -] -``` - -## Custom CSS Enhancements (`_static/custom.css`) - -Apply these baselines to lift visual hierarchy: - -```css -:root { - --md-tooltip-width: 600px; -} - -/* Hovering cards from sphinx-design */ -.sd-card { - border-radius: 1rem; - transition: box-shadow 0.2s ease, transform 0.2s ease; -} - -.sd-card:hover { - box-shadow: 0 4px 16px rgb(0 0 0 / 12%); - transform: translateY(-2px); -} - -/* Code block container aesthetics */ -.code-block-caption { - margin-bottom: 0.5rem; - font-weight: 600; -} - -pre > code { - border-radius: 0.4rem; -} -``` - -## Best Practices - -- **MyST extensions**: Activate `colon_fence`, `attrs_block`, and `deflist` in `conf.py` for rich Markdown rendering that doesn't rely on crude tables. -- **Navigation density**: Immaterial benefits from deep hierarchies; do not be afraid to nest toctrees, as the expandable sidebar handles them well. -- **Feature Toggles**: Start with all navigation features enabled, then strip down if layout is crowded for your specific page flow. diff --git a/plugins/flow/skills/sphinx/references/shibuya.md b/plugins/flow/skills/sphinx/references/shibuya.md deleted file mode 100644 index e2f287c..0000000 --- a/plugins/flow/skills/sphinx/references/shibuya.md +++ /dev/null @@ -1,82 +0,0 @@ - -# Sphinx + Shibuya Docs Workflow - -## Overview - -Use this skill to design or rework Sphinx documentation that uses the Shibuya theme. Focus on clean structure, short pages, and Shibuya-friendly directives. - -## Workflow - -### 1) Discover current structure - -- Read `docs/conf.py` for enabled extensions and theme config. -- Scan `docs/index.rst` and top-level section indexes for current toctree structure. -- Locate custom extensions in `tools/sphinx_ext/` and reuse them before adding new ones. -- Prefer `docs/examples/` + `literalinclude` patterns for code snippets. - -### 2) Structure for short pages - -- Split long guides into per-topic pages using a section `index.rst` with a hidden toctree. -- Keep each page scoped to one concept or workflow; link out to examples and reference pages. -- Prefer list tables or grid cards for navigation hubs. - -Example Shibuya grid card hub (use with sphinx-design): - -```rst -.. grid:: 1 1 2 4 - :gutter: 2 - :padding: 0 - - .. grid-item-card:: Litestar - :link: frameworks/litestar - :link-type: doc - - .. image:: /_static/logos/litestar.svg - :width: 72 - :align: center - :alt: Litestar -``` - -### 3) Shibuya-compatible extensions to consider - -Use only what the docs actually need, but favor these Shibuya-friendly extensions: - -- `sphinx_design` for grids, cards, and layout components. -- `sphinx_iconify` for icon usage. -- `sphinx_docsearch` for Algolia DocSearch UI. -- `sphinx_tabs.tabs` for tabbed content. -- `sphinx_togglebutton` for collapsible sections. -- `sphinx_datatables` for data tables where sorting/searching helps. -- `sphinx_copybutton`, `sphinx_paramlinks`, `sphinxcontrib.mermaid` as needed. - -### 4) Code samples - -- Use `literalinclude` with `# start-example` / `# end-example` markers. -- Keep examples short and runnable with `pytest` where possible. -- Prefer language-specific highlights (`:language: python`, `:language: sql`). - -### 5) Validation - -- Run `make docs` and address warnings. -- If auto-generated API docs are present, keep them excluded from the main toctree unless explicitly linked. - -## Project conventions (SQLSpec) - -- Prefer examples in `docs/examples/` and reference them with `literalinclude`. -- Keep pages short, avoid heavy emoji usage, and favor neutral tone. -- Use custom directives from `tools/sphinx_ext/` when available (e.g., playground/changelog helpers). - -## Official References - -- -- -- -- -- -- - -## Shared Styleguide Baseline - -- Use shared styleguides for generic language/framework rules to reduce duplication in this skill. -- [General Principles](https://github.com/cofin/flow/blob/main/templates/styleguides/general.md) -- Keep this skill focused on tool-specific workflows, edge cases, and integration details. diff --git a/plugins/flow/skills/sphinx/references/vhs-demos.md b/plugins/flow/skills/sphinx/references/vhs-demos.md deleted file mode 100644 index d8593b1..0000000 --- a/plugins/flow/skills/sphinx/references/vhs-demos.md +++ /dev/null @@ -1,69 +0,0 @@ - -# Sphinx VHS Demos Workflow - -## Overview - -Use this skill to generate and manage automated terminal demonstrations using `vhs`. By writing declarative scripts (`.tape`), you can compile reproducible terminal interface visual streams that update on demand without reliance on brittle screen recorders. - -## Execution Workflow - -### 1. Create Tape Scripts (`docs/_tapes/*.tape`) - -Write `.tape` declarative scripts that execute commands as users would. Setup initial configurations like themes and padding. - -**Example Tape Structure (`docs/_tapes/demo.tape`):** - -```text -Output docs/_static/demos/example_command.gif - -Set Theme "Material" -Set FontSize 16 -Set Width 1200 -Set Height 600 -Set Padding 20 - -# Slow down typing speed for clarity -Set TypingSpeed 75ms - -Type "uv run myapp items list" -Sleep 500ms -Enter - -Sleep 3s -``` - -### 2. Local Execution - -Running `vhs` requires supporting tools to render streams accurately in isolation. - -- **Core dependency**: `vhs` -- **Supporting nodes**: `ttyd` (isolates terminal output), `ffmpeg` (compilation) - -To compile local script additions: - -```bash -# Verify setup -vhs --version - -# Compile single tape script into your artifacts directory -vhs docs/_tapes/demo.tape -``` - -### 3. Embedding inside Sphinx - -Standard directives pull rendered media automatically inside your guide flows structure: - -```markdown -## assessment execution - -Here is how list assessment works visually: - -![Assessment Demo](/_static/demos/example_command.gif) -``` - -## Best Practices - -- **Static output locations**: Always route Outputs to `docs/_static/demos/*.gif` so standard theme modules detect and package them smoothly. -- **Sandbox isolation**: In environments utilizing complex setups, isolate rendering via `docker` configurations if local dependency chains conflict. -- **Bypassing Safety Checks**: If tape commands intentional execute live endpoints requiring security triggers, supply bypass flags scoped only for the visualization step without tampering codebase defenses. -- **Framerate guidelines**: Favor standard frame rates; very fast speeds make scanning harder for new users. diff --git a/plugins/flow/skills/sphinx/references/wasm-playground.md b/plugins/flow/skills/sphinx/references/wasm-playground.md deleted file mode 100644 index 93127f5..0000000 --- a/plugins/flow/skills/sphinx/references/wasm-playground.md +++ /dev/null @@ -1,86 +0,0 @@ - -# Sphinx Wasm Playground Workflow - -## Overview - -Use this skill to add interactive WebAssembly (Wasm) playgrounds to Sphinx documentation workflows. It involves creating custom Docutils directive nodes that render HTML templates linked to browser-based execution runtimes like Pyodide. - -## Implementation Strategy - -The standard pattern utilizes a **custom Sphinx extension** residing in `tools/sphinx_ext/` combined with an HTML template. - -### 1. Custom Directive (`tools/sphinx_ext/playground.py`) - -Create a directive class that generates rendered nodes containing script contexts: - -```python -"""Sphinx directive for Pyodide execution.""" -from __future__ import annotations - -from pathlib import Path -from typing import Any -from uuid import uuid4 - -from docutils import nodes -from docutils.parsers.rst import Directive -from jinja2 import Environment, FileSystemLoader, select_autoescape - -TEMPLATE_NAME = "playground_template.html" - -class WasmPlayground(Directive): - """Embed a Wasm-powered playground in the docs.""" - has_content = False - - def run(self) -> list[nodes.Node]: - playground_id = uuid4().hex - env = Environment( - loader=FileSystemLoader(Path(__file__).parent), - autoescape=select_autoescape(["html", "xml"]) - ) - template = env.get_template(TEMPLATE_NAME) - # Pass identifier or pre-seeded script contents back to layout - rendered = template.render(id=playground_id) - return [nodes.raw(text=rendered, format="html")] - -def setup(app: Any) -> dict[str, Any]: - app.add_directive("wasm-playground", WasmPlayground) - return {"version": "1.0", "parallel_read_safe": True, "parallel_write_safe": True} -``` - -### 2. HTML Integration (`tools/sphinx_ext/playground_template.html`) - -Ensure that your template incorporates essential script calls that interface well with standard template assets: - -```html -
- -
- - -``` - -### 3. Configuration (`conf.py`) - -Hook the extension into your build cycle: - -```python -import sys -from pathlib import Path - -# Ensure Sphinx can find local extensions -sys.path.insert(0, str(Path("../").resolve())) - -extensions = [ - # ... other extensions - "tools.sphinx_ext.playground", -] -``` - -## Integration best practices - -- **Sandboxed execution**: Favor loading heavy Wasm environments (like Pyodide libraries) in `