From d9a06239bbd3eee2983d8a5c821445b534733a73 Mon Sep 17 00:00:00 2001 From: xintaofei Date: Wed, 17 Jun 2026 11:30:37 +0800 Subject: [PATCH 1/6] chore(acp): bump claude code to 0.46.0 --- src-tauri/src/acp/registry.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src-tauri/src/acp/registry.rs b/src-tauri/src/acp/registry.rs index 819c23f9..17216250 100644 --- a/src-tauri/src/acp/registry.rs +++ b/src-tauri/src/acp/registry.rs @@ -143,8 +143,8 @@ pub fn get_agent_meta(agent_type: AgentType) -> AcpAgentMeta { name: "Claude Code", description: "ACP wrapper for Anthropic's Claude", distribution: AgentDistribution::Npx { - version: "0.45.0", - package: "@agentclientprotocol/claude-agent-acp@0.45.0", + version: "0.46.0", + package: "@agentclientprotocol/claude-agent-acp@0.46.0", cmd: "claude-agent-acp", args: &[], env: &[], @@ -376,8 +376,8 @@ mod tests { fn registry_pins_current_acp_agent_versions() { assert_npx_version( AgentType::ClaudeCode, - "0.45.0", - "@agentclientprotocol/claude-agent-acp@0.45.0", + "0.46.0", + "@agentclientprotocol/claude-agent-acp@0.46.0", None, ); assert_npx_version( From 0a25837488e52622168c81f841bd304d05987d7d Mon Sep 17 00:00:00 2001 From: xintaofei Date: Wed, 17 Jun 2026 19:02:25 +0800 Subject: [PATCH 2/6] chore(experts): sync superpowers v6.0.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the bundled expert skills from superpowers v5.1.0 to v6.0.2. The v6.0.0 rewrite reshapes subagent-driven-development — a single task reviewer replaces the separate spec and code-quality reviewers, with new task-brief/review-package helper scripts — adds Global Constraints and per-task Interfaces to writing-plans, hardens the brainstorming visual companion's security model, and makes the skills vendor-neutral with per-harness tool references (adding Claude Code, Pi and Antigravity). Skills are vendored verbatim from upstream; experts.toml is unchanged since the skill ids are stable. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../experts/skills/brainstorming/SKILL.md | 15 +- .../brainstorming/scripts/frame-template.html | 51 +-- .../skills/brainstorming/scripts/helper.js | 123 ++++- .../skills/brainstorming/scripts/server.cjs | 429 ++++++++++++++++-- .../brainstorming/scripts/start-server.sh | 79 +++- .../brainstorming/scripts/stop-server.sh | 68 ++- .../spec-document-reviewer-prompt.md | 2 +- .../skills/brainstorming/visual-companion.md | 55 ++- .../dispatching-parallel-agents/SKILL.md | 15 +- .../experts/skills/executing-plans/SKILL.md | 4 +- .../finishing-a-development-branch/SKILL.md | 14 +- .../skills/receiving-code-review/SKILL.md | 4 +- .../skills/requesting-code-review/SKILL.md | 4 +- .../requesting-code-review/code-reviewer.md | 26 +- .../subagent-driven-development/SKILL.md | 271 ++++++++--- .../code-quality-reviewer-prompt.md | 25 - .../implementer-prompt.md | 34 +- .../scripts/review-package | 47 ++ .../scripts/task-brief | 42 ++ .../spec-reviewer-prompt.md | 61 --- .../task-reviewer-prompt.md | 188 ++++++++ .../skills/systematic-debugging/SKILL.md | 2 +- .../skills/test-driven-development/SKILL.md | 2 +- .../skills/using-git-worktrees/SKILL.md | 31 +- .../experts/skills/using-superpowers/SKILL.md | 34 +- .../references/antigravity-tools.md | 96 ++++ .../references/claude-code-tools.md | 50 ++ .../references/codex-tools.md | 37 +- .../references/copilot-tools.md | 47 +- .../references/gemini-tools.md | 76 ++-- .../using-superpowers/references/pi-tools.md | 28 ++ .../experts/skills/writing-plans/SKILL.md | 22 + .../plan-document-reviewer-prompt.md | 2 +- .../experts/skills/writing-skills/SKILL.md | 70 ++- .../anthropic-best-practices.md | 176 +++---- .../writing-skills/persuasion-principles.md | 6 +- 36 files changed, 1709 insertions(+), 527 deletions(-) delete mode 100644 src-tauri/experts/skills/subagent-driven-development/code-quality-reviewer-prompt.md create mode 100755 src-tauri/experts/skills/subagent-driven-development/scripts/review-package create mode 100755 src-tauri/experts/skills/subagent-driven-development/scripts/task-brief delete mode 100644 src-tauri/experts/skills/subagent-driven-development/spec-reviewer-prompt.md create mode 100644 src-tauri/experts/skills/subagent-driven-development/task-reviewer-prompt.md create mode 100644 src-tauri/experts/skills/using-superpowers/references/antigravity-tools.md create mode 100644 src-tauri/experts/skills/using-superpowers/references/claude-code-tools.md create mode 100644 src-tauri/experts/skills/using-superpowers/references/pi-tools.md diff --git a/src-tauri/experts/skills/brainstorming/SKILL.md b/src-tauri/experts/skills/brainstorming/SKILL.md index 06cd0a21..b0d52b25 100644 --- a/src-tauri/experts/skills/brainstorming/SKILL.md +++ b/src-tauri/experts/skills/brainstorming/SKILL.md @@ -22,7 +22,7 @@ Every project goes through this process. A todo list, a single-function utility, You MUST create a task for each of these items and complete them in order: 1. **Explore project context** — check files, docs, recent commits -2. **Offer visual companion** (if topic will involve visual questions) — this is its own message, not combined with a clarifying question. See the Visual Companion section below. +2. **Offer the visual companion just-in-time** — NOT upfront. The first time a question would genuinely be clearer shown than described, offer it then (its own message); on approval its browser tab opens for you. If no visual question ever arises, never offer it. See the Visual Companion section below. 3. **Ask clarifying questions** — one at a time, understand purpose/constraints/success criteria 4. **Propose 2-3 approaches** — with trade-offs and your recommendation 5. **Present design** — in sections scaled to their complexity, get user approval after each section @@ -36,8 +36,6 @@ You MUST create a task for each of these items and complete them in order: ```dot digraph brainstorming { "Explore project context" [shape=box]; - "Visual questions ahead?" [shape=diamond]; - "Offer Visual Companion\n(own message, no other content)" [shape=box]; "Ask clarifying questions" [shape=box]; "Propose 2-3 approaches" [shape=box]; "Present design sections" [shape=box]; @@ -47,10 +45,7 @@ digraph brainstorming { "User reviews spec?" [shape=diamond]; "Invoke writing-plans skill" [shape=doublecircle]; - "Explore project context" -> "Visual questions ahead?"; - "Visual questions ahead?" -> "Offer Visual Companion\n(own message, no other content)" [label="yes"]; - "Visual questions ahead?" -> "Ask clarifying questions" [label="no"]; - "Offer Visual Companion\n(own message, no other content)" -> "Ask clarifying questions"; + "Explore project context" -> "Ask clarifying questions"; "Ask clarifying questions" -> "Propose 2-3 approaches"; "Propose 2-3 approaches" -> "Present design sections"; "Present design sections" -> "User approves design?"; @@ -148,10 +143,10 @@ Wait for the user's response. If they request changes, make them and re-run the A browser-based companion for showing mockups, diagrams, and visual options during brainstorming. Available as a tool — not a mode. Accepting the companion means it's available for questions that benefit from visual treatment; it does NOT mean every question goes through the browser. -**Offering the companion:** When you anticipate that upcoming questions will involve visual content (mockups, layouts, diagrams), offer it once for consent: -> "Some of what we're working on might be easier to explain if I can show it to you in a web browser. I can put together mockups, diagrams, comparisons, and other visuals as we go. This feature is still new and can be token-intensive. Want to try it? (Requires opening a local URL)" +**Offering the companion (just-in-time):** Do NOT offer it upfront. Wait until a question would genuinely be clearer shown than told — a real mockup / layout / diagram question, not merely a UI *topic*. The first time that happens, offer it then, as its own message: +> "This next part might be easier if I show you — I can put together mockups, diagrams, and comparisons in a browser tab as we go. It's still new and can be token-intensive. Want me to? I'll open it for you." -**This offer MUST be its own message.** Do not combine it with clarifying questions, context summaries, or any other content. The message should contain ONLY the offer above and nothing else. Wait for the user's response before continuing. If they decline, proceed with text-only brainstorming. +**This offer MUST be its own message.** Only the offer — no clarifying question, summary, or other content. Wait for the user's response. If they accept, start the server with `--open` so their browser opens to the first screen automatically. If they decline, continue text-only and don't offer again unless they raise it. **Per-question decision:** Even after the user accepts, decide FOR EACH QUESTION whether to use the browser or the terminal. The test: **would the user understand this better by seeing it than reading it?** diff --git a/src-tauri/experts/skills/brainstorming/scripts/frame-template.html b/src-tauri/experts/skills/brainstorming/scripts/frame-template.html index dcfe0181..f540bb8a 100644 --- a/src-tauri/experts/skills/brainstorming/scripts/frame-template.html +++ b/src-tauri/experts/skills/brainstorming/scripts/frame-template.html @@ -9,11 +9,11 @@ * * This template provides a consistent frame with: * - OS-aware light/dark theming - * - Fixed header and selection indicator bar + * - Header branding and connection status * - Scrollable main content area * - CSS helpers for common UI patterns * - * Content is injected via placeholder comment in #claude-content. + * Content is injected via placeholder comment in #frame-content. */ * { box-sizing: border-box; margin: 0; padding: 0; } @@ -63,34 +63,37 @@ } /* ===== FRAME STRUCTURE ===== */ - .header { - background: var(--bg-secondary); - padding: 0.5rem 1.5rem; - display: flex; - justify-content: space-between; - align-items: center; - border-bottom: 1px solid var(--border); - flex-shrink: 0; + .brand { display: flex; align-items: center; min-width: 0; overflow: hidden; color: var(--text-secondary); line-height: 1; } + .brand a { color: inherit; text-decoration: none; display: flex; align-items: center; gap: 0.5rem; min-width: 0; max-width: 100%; line-height: 1; } + .brand-copy { display: block; min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; line-height: 1; transform: translateY(-1px); } + .brand-logo { display: block; height: 1em; width: auto; max-width: 180px; flex-shrink: 0; filter: invert(1); } + @media (prefers-color-scheme: dark) { + .brand-logo { filter: none; } } - .header h1 { font-size: 0.85rem; font-weight: 500; color: var(--text-secondary); } - .header .status { font-size: 0.7rem; color: var(--success); display: flex; align-items: center; gap: 0.4rem; } - .header .status::before { content: ''; width: 6px; height: 6px; background: var(--success); border-radius: 50%; } + .status { font-size: 0.7rem; color: var(--status-color, var(--success)); display: flex; align-items: center; gap: 0.4rem; justify-self: end; white-space: nowrap; line-height: 1; } + .status::before { content: ''; width: 6px; height: 6px; background: var(--status-color, var(--success)); border-radius: 50%; } .main { flex: 1; overflow-y: auto; } - #claude-content { padding: 2rem; min-height: 100%; } + #frame-content { padding: 2rem; min-height: 100%; } - .indicator-bar { + .header { background: var(--bg-secondary); - border-top: 1px solid var(--border); + border-bottom: 1px solid var(--border); padding: 0.5rem 1.5rem; flex-shrink: 0; - text-align: center; + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + align-items: center; + gap: 1rem; + min-height: 42px; } - .indicator-bar span { + .header .brand { justify-self: start; width: 100%; font-size: 0.75rem; line-height: 1; } + .header .status { grid-column: 2; line-height: 1; } + .header span { font-size: 0.75rem; color: var(--text-secondary); } - .indicator-bar .selected-text { + .header .selected-text { color: var(--accent); font-weight: 500; } @@ -196,19 +199,15 @@
-

Superpowers Brainstorming

-
Connected
+ +
Connecting…
-
+
-
- Click an option above, then return to the terminal -
- diff --git a/src-tauri/experts/skills/brainstorming/scripts/helper.js b/src-tauri/experts/skills/brainstorming/scripts/helper.js index 111f97f5..e11d2648 100644 --- a/src-tauri/experts/skills/brainstorming/scripts/helper.js +++ b/src-tauri/experts/skills/brainstorming/scripts/helper.js @@ -1,26 +1,120 @@ (function() { - const WS_URL = 'ws://' + window.location.host; + const MIN_RECONNECT_MS = 500; + const MAX_RECONNECT_MS = 30000; + const TOMBSTONE_AFTER_MS = 15000; // show the "paused" overlay after this long disconnected + + // Pure: next backoff delay (doubles, capped). Exported for unit tests. + function nextReconnectDelay(current, max) { + return Math.min(current * 2, max); + } + if (typeof module !== 'undefined' && module.exports) { + module.exports = { nextReconnectDelay, MIN_RECONNECT_MS, MAX_RECONNECT_MS, TOMBSTONE_AFTER_MS }; + } + + // Everything below is browser-only; bail out when loaded in Node (tests). + if (typeof window === 'undefined') return; + let ws = null; let eventQueue = []; + let reconnectDelay = MIN_RECONNECT_MS; + let reconnectTimer = null; + let disconnectedSince = null; + let everConnected = false; + let tombstoneShown = false; + + function sessionKey() { + try { + return window.sessionStorage && window.sessionStorage.getItem('brainstorm-session-key'); + } catch (e) {} + return null; + } + + function websocketUrl() { + const key = sessionKey(); + return 'ws://' + window.location.host + (key ? '/?key=' + encodeURIComponent(key) : ''); + } + + function reloadAfterRecovery() { + const key = sessionKey(); + if (key) { + window.location.replace('/?key=' + encodeURIComponent(key)); + } else { + window.location.reload(); + } + } + + // Reflect connection state in the frame's status pill (absent on full-doc screens). + function setStatus(state) { + const el = document.querySelector('.status'); + if (!el) return; + const map = { + connecting: ['Connecting…', 'var(--text-tertiary)'], + connected: ['Connected', 'var(--success)'], + reconnecting: ['Reconnecting…', 'var(--warning)'], + disconnected: ['Disconnected', 'var(--error)'] + }; + const [text, color] = map[state] || map.disconnected; + el.textContent = text; + el.style.setProperty('--status-color', color); + } + + // Self-styled so it works on framed and full-document screens alike. + function showTombstone() { + if (tombstoneShown) return; + tombstoneShown = true; + const el = document.createElement('div'); + el.id = 'bs-tombstone'; + el.style.cssText = 'position:fixed;inset:0;z-index:99999;display:flex;' + + 'align-items:center;justify-content:center;padding:2rem;text-align:center;' + + 'background:rgba(20,20,22,0.92);color:#f5f5f7;font-family:system-ui,sans-serif'; + el.innerHTML = '
' + + '

Companion paused

' + + '

This brainstorm companion has stopped. ' + + 'Ask your coding agent to bring it back — this page reconnects automatically.

'; + if (document.body) document.body.appendChild(el); + } function connect() { - ws = new WebSocket(WS_URL); + if (reconnectTimer) { clearTimeout(reconnectTimer); reconnectTimer = null; } + setStatus(everConnected ? 'reconnecting' : 'connecting'); + ws = new WebSocket(websocketUrl()); ws.onopen = () => { + const recovered = tombstoneShown; + everConnected = true; + disconnectedSince = null; + reconnectDelay = MIN_RECONNECT_MS; + tombstoneShown = false; + setStatus('connected'); eventQueue.forEach(e => ws.send(JSON.stringify(e))); eventQueue = []; + // Recovered from a tombstoned outage (e.g. the server restarted on the same + // port) — reload through the keyed bootstrap when possible so the cookie is + // refreshed before the visible URL returns to bare /. + if (recovered) reloadAfterRecovery(); }; ws.onmessage = (msg) => { - const data = JSON.parse(msg.data); - if (data.type === 'reload') { - window.location.reload(); - } + let data; + try { data = JSON.parse(msg.data); } catch (e) { return; } + if (data.type === 'reload') window.location.reload(); }; ws.onclose = () => { - setTimeout(connect, 1000); + ws = null; + if (disconnectedSince === null) disconnectedSince = Date.now(); + if (Date.now() - disconnectedSince >= TOMBSTONE_AFTER_MS) { + setStatus('disconnected'); + showTombstone(); + } else { + setStatus('reconnecting'); + } + reconnectTimer = setTimeout(connect, reconnectDelay); + reconnectDelay = nextReconnectDelay(reconnectDelay, MAX_RECONNECT_MS); }; + + // Let onclose own reconnection so we don't schedule it twice. + ws.onerror = () => { try { ws.close(); } catch (e) {} }; } function sendEvent(event) { @@ -44,21 +138,6 @@ id: target.id || null }); - // Update indicator bar (defer so toggleSelect runs first) - setTimeout(() => { - const indicator = document.getElementById('indicator-text'); - if (!indicator) return; - const container = target.closest('.options') || target.closest('.cards'); - const selected = container ? container.querySelectorAll('.selected') : []; - if (selected.length === 0) { - indicator.textContent = 'Click an option above, then return to the terminal'; - } else if (selected.length === 1) { - const label = selected[0].querySelector('h3, .content h3, .card-body h3')?.textContent?.trim() || selected[0].dataset.choice; - indicator.innerHTML = '' + label + ' selected — return to terminal to continue'; - } else { - indicator.innerHTML = '' + selected.length + ' selected — return to terminal to continue'; - } - }, 0); }); // Frame UI: selection tracking diff --git a/src-tauri/experts/skills/brainstorming/scripts/server.cjs b/src-tauri/experts/skills/brainstorming/scripts/server.cjs index 562c17f8..a828b35a 100644 --- a/src-tauri/experts/skills/brainstorming/scripts/server.cjs +++ b/src-tauri/experts/skills/brainstorming/scripts/server.cjs @@ -7,6 +7,7 @@ const path = require('path'); const OPCODES = { TEXT: 0x01, CLOSE: 0x08, PING: 0x09, PONG: 0x0A }; const WS_MAGIC = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11'; +const MAX_FRAME_PAYLOAD_BYTES = 10 * 1024 * 1024; function computeAcceptKey(clientKey) { return crypto.createHash('sha1').update(clientKey + WS_MAGIC).digest('base64'); @@ -53,10 +54,18 @@ function decodeFrame(buffer) { offset = 4; } else if (payloadLen === 127) { if (buffer.length < 10) return null; - payloadLen = Number(buffer.readBigUInt64BE(2)); + const extendedLen = buffer.readBigUInt64BE(2); + if (extendedLen > BigInt(MAX_FRAME_PAYLOAD_BYTES)) { + throw new Error('WebSocket frame payload exceeds maximum allowed size'); + } + payloadLen = Number(extendedLen); offset = 10; } + if (payloadLen > MAX_FRAME_PAYLOAD_BYTES) { + throw new Error('WebSocket frame payload exceeds maximum allowed size'); + } + const maskOffset = offset; const dataOffset = offset + 4; const totalLen = dataOffset + payloadLen; @@ -73,14 +82,74 @@ function decodeFrame(buffer) { // ========== Configuration ========== -const PORT = process.env.BRAINSTORM_PORT || (49152 + Math.floor(Math.random() * 16383)); +const PORT_FILE = process.env.BRAINSTORM_PORT_FILE || null; +const randomPort = () => 49152 + Math.floor(Math.random() * 16383); +// Prefer an explicit port, else the port this session last bound (so a restart +// reuses it and an already-open browser tab reconnects), else a random high port. +function preferredPort() { + if (process.env.BRAINSTORM_PORT) return Number(process.env.BRAINSTORM_PORT); + if (PORT_FILE) { + try { + const p = Number(fs.readFileSync(PORT_FILE, 'utf-8').trim()); + if (Number.isInteger(p) && p > 1023 && p < 65536) return p; + } catch (e) { /* no prior port recorded */ } + } + return randomPort(); +} +let PORT = preferredPort(); const HOST = process.env.BRAINSTORM_HOST || '127.0.0.1'; const URL_HOST = process.env.BRAINSTORM_URL_HOST || (HOST === '127.0.0.1' ? 'localhost' : HOST); const SESSION_DIR = process.env.BRAINSTORM_DIR || '/tmp/brainstorm'; const CONTENT_DIR = path.join(SESSION_DIR, 'content'); const STATE_DIR = path.join(SESSION_DIR, 'state'); +const SUPERPOWERS_VERSION = readSuperpowersVersion(); +const SUPERPOWERS_BRAND_IMAGE_URL = 'https://primeradiant.com/brand/superpowers-visual-brainstorming-logo.png'; +const TELEMETRY_DISABLE_ENV_VARS = [ + 'SUPERPOWERS_DISABLE_TELEMETRY', + 'DISABLE_TELEMETRY', + 'CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC' +]; +const SUPERPOWERS_TELEMETRY_DISABLED = TELEMETRY_DISABLE_ENV_VARS.some(name => isTruthyEnv(process.env[name])); let ownerPid = process.env.BRAINSTORM_OWNER_PID ? Number(process.env.BRAINSTORM_OWNER_PID) : null; +// Per-session secret key. The companion is reachable by any local browser tab +// and, when bound to a non-loopback host, by any host that can route to it. +// The key authenticates the real client uniformly across loopback, tunnel, and +// remote binds — and defeats DNS rebinding — where a Host/Origin allowlist +// cannot. It rides the served URL as ?key= and is mirrored into a cookie on +// first load so same-origin subresources and the WebSocket carry it for free. +// Persisted alongside the port (BRAINSTORM_TOKEN_FILE) so a restart keeps the +// same key and an already-open tab's cookie still validates. +const TOKEN_FILE = process.env.BRAINSTORM_TOKEN_FILE || null; +function generateToken() { + return crypto.randomBytes(32).toString('hex'); +} + +function chmodOwnerOnly(file) { + try { fs.chmodSync(file, 0o600); } catch (e) { /* best effort */ } +} + +function initialToken() { + if (process.env.BRAINSTORM_TOKEN) { + return { value: process.env.BRAINSTORM_TOKEN, source: 'env' }; + } + if (TOKEN_FILE) { + try { + const t = fs.readFileSync(TOKEN_FILE, 'utf-8').trim(); + if (/^[0-9a-f]{32,}$/i.test(t)) { + chmodOwnerOnly(TOKEN_FILE); + return { value: t, source: 'file' }; + } + } catch (e) { /* no prior token recorded */ } + } + return { value: generateToken(), source: 'generated' }; +} + +const tokenInfo = initialToken(); +let TOKEN = tokenInfo.value; +let tokenSource = tokenInfo.source; +let COOKIE_NAME = 'brainstorm-key-' + PORT; // refined to the actual bound port in onListen + const MIME_TYPES = { '.html': 'text/html', '.css': 'text/css', '.js': 'application/javascript', '.json': 'application/json', '.png': 'image/png', '.jpg': 'image/jpeg', @@ -89,14 +158,46 @@ const MIME_TYPES = { // ========== Templates and Constants ========== -const WAITING_PAGE = ` +function waitingPage() { + return renderBranding(` Brainstorm Companion + + +

Brainstorm Companion

+

Waiting for the agent to push a screen...

`); +} + +const FORBIDDEN_PAGE = ` + +Session key required +h1 { color: #333; } p { color: #666; } code { background: #f0f0f0; padding: 0.1em 0.3em; border-radius: 4px; } -

Brainstorm Companion

-

Waiting for the agent to push a screen...

`; +

Session key required

+

This page needs the full URL your coding agent gave you, including the +?key=… part. Copy the complete URL and open it again.

`; + +function bootstrapPage(key) { + const jsonKey = JSON.stringify(String(key)); + return ` + +Opening Brainstorm Companion + + + +`; +} const frameTemplate = fs.readFileSync(path.join(__dirname, 'frame-template.html'), 'utf-8'); const helperScript = fs.readFileSync(path.join(__dirname, 'helper.js'), 'utf-8'); @@ -104,35 +205,209 @@ const helperInjection = ''; // ========== Helper Functions ========== +function readSuperpowersVersion() { + const root = path.join(__dirname, '../../..'); + const manifests = [ + path.join(root, 'package.json'), + path.join(root, '.codex-plugin/plugin.json') + ]; + + for (const manifest of manifests) { + try { + const data = JSON.parse(fs.readFileSync(manifest, 'utf-8')); + if (data.version) return String(data.version); + } catch (e) { + // Packaged Codex plugins omit package.json; try the next manifest. + } + } + + return 'unknown'; +} + +function isTruthyEnv(value) { + if (!value) return false; + const normalized = String(value).trim().toLowerCase(); + if (!normalized) return false; + return !['0', 'false', 'no', 'off'].includes(normalized); +} + +function escapeHtmlText(value) { + return String(value) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"'); +} + +function brandMarkup() { + const version = escapeHtmlText(SUPERPOWERS_VERSION); + const text = SUPERPOWERS_TELEMETRY_DISABLED + ? 'Prime Radiant Superpowers v' + version + : 'Superpowers v' + version; + const logo = SUPERPOWERS_TELEMETRY_DISABLED + ? '' + : ''; + + return ''; +} + +function renderBranding(html) { + return html.split('').join(brandMarkup()); +} + function isFullDocument(html) { const trimmed = html.trimStart().toLowerCase(); return trimmed.startsWith('', content); + return renderBranding(frameTemplate).replace('', content); } function getNewestScreen() { const files = fs.readdirSync(CONTENT_DIR) - .filter(f => f.endsWith('.html')) + .filter(f => !f.startsWith('.') && f.endsWith('.html')) .map(f => { const fp = path.join(CONTENT_DIR, f); + if (!isRegularFileInsideContentDir(fp)) return null; return { path: fp, mtime: fs.statSync(fp).mtime.getTime() }; }) + .filter(Boolean) .sort((a, b) => b.mtime - a.mtime); return files.length > 0 ? files[0].path : null; } +function urlHostForHttp(host) { + const h = String(host); + if (h.startsWith('[') && h.endsWith(']')) return h; + return h.includes(':') ? '[' + h + ']' : h; +} + +function companionUrl() { + return 'http://' + urlHostForHttp(URL_HOST) + ':' + PORT + '/?key=' + TOKEN; +} + +function browserLauncherForPlatform(url, { + platform = process.platform, + osRelease = require('os').release(), + env = process.env +} = {}) { + const isWSL = platform === 'linux' && /microsoft/i.test(osRelease); + if (platform === 'darwin') return { bin: 'open', args: [url] }; + if (platform === 'win32' || isWSL) { + return { bin: 'rundll32.exe', args: ['url.dll,FileProtocolHandler', url] }; + } + if (env.DISPLAY || env.WAYLAND_DISPLAY) return { bin: 'xdg-open', args: [url] }; + return null; +} + +function isRegularFileInsideContentDir(filePath) { + let stat, realContentDir, realFilePath; + try { + stat = fs.lstatSync(filePath); + if (stat.isSymbolicLink()) return false; + if (!stat.isFile()) return false; + if (stat.nlink !== 1) return false; + realContentDir = fs.realpathSync(CONTENT_DIR); + realFilePath = fs.realpathSync(filePath); + } catch (e) { + return false; + } + return realFilePath.startsWith(realContentDir + path.sep); +} + +// ========== Authentication ========== + +function timingSafeEqualStr(a, b) { + const ab = Buffer.from(String(a)); + const bb = Buffer.from(String(b)); + if (ab.length !== bb.length) return false; + return crypto.timingSafeEqual(ab, bb); +} + +function parseCookies(header) { + const out = {}; + if (!header) return out; + for (const part of header.split(';')) { + const eq = part.indexOf('='); + if (eq < 0) continue; + out[part.slice(0, eq).trim()] = part.slice(eq + 1).trim(); + } + return out; +} + +// A request is authorized if it carries the session key as ?key= or as the +// session cookie. Both are compared in constant time. +function isAuthorized(req) { + const q = req.url.indexOf('?'); + if (q >= 0) { + const params = new URLSearchParams(req.url.slice(q + 1)); + if (params.has('key')) { + const key = params.get('key'); + return Boolean(key && timingSafeEqualStr(key, TOKEN)); + } + } + const cookie = parseCookies(req.headers['cookie'])[COOKIE_NAME]; + if (cookie && timingSafeEqualStr(cookie, TOKEN)) return true; + return false; +} + +function pathnameOf(url) { + const q = url.indexOf('?'); + return q >= 0 ? url.slice(0, q) : url; +} + +function queryKey(url) { + const q = url.indexOf('?'); + if (q < 0) return null; + return new URLSearchParams(url.slice(q + 1)).get('key'); +} + +function securityHeaders(headers = {}) { + return { + 'Referrer-Policy': 'no-referrer', + 'Cache-Control': 'no-store', + 'X-Frame-Options': 'DENY', + 'Content-Security-Policy': "frame-ancestors 'none'", + 'Cross-Origin-Resource-Policy': 'same-origin', + ...headers + }; +} + +function isAllowedWebSocketOrigin(req) { + const origin = req.headers.origin; + if (!origin) return true; + const host = req.headers.host; + if (!host) return false; + return origin === 'http://' + host; +} + // ========== HTTP Request Handler ========== function handleRequest(req, res) { - touchActivity(); - if (req.method === 'GET' && req.url === '/') { + if (!isAuthorized(req)) { + res.writeHead(403, securityHeaders({ 'Content-Type': 'text/html; charset=utf-8' })); + res.end(FORBIDDEN_PAGE); + return; + } + touchActivity(); // only authorized requests count as activity + + // Mirror the key into a cookie so same-origin subresources (/files/*) can + // authenticate after bootstrap. HttpOnly keeps it away from page scripts; the + // WebSocket Origin check below is what blocks cross-origin localhost injection. + res.setHeader('Set-Cookie', + COOKIE_NAME + '=' + TOKEN + '; HttpOnly; SameSite=Strict; Path=/'); + + const pathname = pathnameOf(req.url); + const keyFromQuery = queryKey(req.url); + if (req.method === 'GET' && pathname === '/' && keyFromQuery && timingSafeEqualStr(keyFromQuery, TOKEN)) { + res.writeHead(200, securityHeaders({ 'Content-Type': 'text/html; charset=utf-8' })); + res.end(bootstrapPage(keyFromQuery)); + } else if (req.method === 'GET' && pathname === '/') { const screenFile = getNewestScreen(); let html = screenFile ? (raw => isFullDocument(raw) ? raw : wrapInFrame(raw))(fs.readFileSync(screenFile, 'utf-8')) - : WAITING_PAGE; + : waitingPage(); if (html.includes('')) { html = html.replace('', helperInjection + '\n'); @@ -140,22 +415,24 @@ function handleRequest(req, res) { html += helperInjection; } - res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' }); + res.writeHead(200, securityHeaders({ 'Content-Type': 'text/html; charset=utf-8' })); res.end(html); - } else if (req.method === 'GET' && req.url.startsWith('/files/')) { - const fileName = req.url.slice(7); - const filePath = path.join(CONTENT_DIR, path.basename(fileName)); - if (!fs.existsSync(filePath)) { - res.writeHead(404); + } else if (req.method === 'GET' && pathname.startsWith('/files/')) { + const fileName = path.basename(pathname.slice(7)); + const filePath = path.join(CONTENT_DIR, fileName); + // Reject empty/dotfile names and anything that isn't a regular file — + // `/files/` would otherwise resolve to CONTENT_DIR and crash readFileSync (EISDIR). + if (!fileName || fileName.startsWith('.') || !isRegularFileInsideContentDir(filePath)) { + res.writeHead(404, securityHeaders()); res.end('Not found'); return; } const ext = path.extname(filePath).toLowerCase(); const contentType = MIME_TYPES[ext] || 'application/octet-stream'; - res.writeHead(200, { 'Content-Type': contentType }); + res.writeHead(200, securityHeaders({ 'Content-Type': contentType })); res.end(fs.readFileSync(filePath)); } else { - res.writeHead(404); + res.writeHead(404, securityHeaders()); res.end('Not found'); } } @@ -165,6 +442,8 @@ function handleRequest(req, res) { const clients = new Set(); function handleUpgrade(req, socket) { + if (!isAuthorized(req) || !isAllowedWebSocketOrigin(req)) { socket.destroy(); return; } + const key = req.headers['sec-websocket-key']; if (!key) { socket.destroy(); return; } @@ -231,7 +510,7 @@ function handleMessage(text) { } touchActivity(); console.log(JSON.stringify({ source: 'user-event', ...event })); - if (event.choice) { + if (event && event.choice) { const eventsFile = path.join(STATE_DIR, 'events'); fs.appendFileSync(eventsFile, JSON.stringify(event) + '\n'); } @@ -244,9 +523,44 @@ function broadcast(msg) { } } +// Best-effort: open the user's browser the first time a screen is actually ready +// to show. Skips when disabled, on a non-loopback (remote) bind, or when a +// browser is already connected. Override the launcher with BRAINSTORM_OPEN_CMD. +let browserOpened = false; +function maybeOpenBrowser() { + if (browserOpened) return; + browserOpened = true; + if (!process.env.BRAINSTORM_OPEN) return; // opt-in: only after the user approves the companion + if (HOST !== '127.0.0.1' && HOST !== 'localhost') return; + if (clients.size > 0) return; // the user already opened it + const url = companionUrl(); // must carry the key or the gate 403s it + const cp = require('child_process'); + // Operator-provided launcher: run as given (this env var is trusted operator input). + if (process.env.BRAINSTORM_OPEN_CMD) { + try { cp.exec(process.env.BRAINSTORM_OPEN_CMD + ' ' + JSON.stringify(url), () => {}); } catch (e) { /* best effort */ } + return; + } + // Platform launchers: pass the URL as an argv element via execFile (no shell), + // so a url-host containing shell metacharacters can't inject a command. + const launcher = browserLauncherForPlatform(url); + if (!launcher) return; // headless: nothing to open + try { cp.execFile(launcher.bin, launcher.args, () => {}); } catch (e) { /* best effort */ } +} + // ========== Activity Tracking ========== -const IDLE_TIMEOUT_MS = 30 * 60 * 1000; // 30 minutes +// Idle timeout: shut down after this long with no activity. Default 4 hours; +// override with BRAINSTORM_IDLE_TIMEOUT_MS (start-server.sh: --idle-timeout-minutes). +const IDLE_TIMEOUT_MS = (() => { + const ms = Number(process.env.BRAINSTORM_IDLE_TIMEOUT_MS); + return Number.isFinite(ms) && ms > 0 ? ms : 4 * 60 * 60 * 1000; +})(); +// How often the watchdog checks for owner-death / idleness. Configurable mainly +// so tests can run fast; production default is 60s. +const LIFECYCLE_CHECK_MS = (() => { + const ms = Number(process.env.BRAINSTORM_LIFECYCLE_CHECK_MS); + return Number.isFinite(ms) && ms > 0 ? ms : 60 * 1000; +})(); let lastActivity = Date.now(); function touchActivity() { @@ -267,14 +581,14 @@ function startServer() { // macOS fs.watch reports 'rename' for both new files and overwrites, // so we can't rely on eventType alone. const knownFiles = new Set( - fs.readdirSync(CONTENT_DIR).filter(f => f.endsWith('.html')) + fs.readdirSync(CONTENT_DIR).filter(f => !f.startsWith('.') && f.endsWith('.html')) ); const server = http.createServer(handleRequest); server.on('upgrade', handleUpgrade); const watcher = fs.watch(CONTENT_DIR, (eventType, filename) => { - if (!filename || !filename.endsWith('.html')) return; + if (!filename || filename.startsWith('.') || !filename.endsWith('.html')) return; if (debounceTimers.has(filename)) clearTimeout(debounceTimers.get(filename)); debounceTimers.set(filename, setTimeout(() => { @@ -289,6 +603,7 @@ function startServer() { const eventsFile = path.join(STATE_DIR, 'events'); if (fs.existsSync(eventsFile)) fs.unlinkSync(eventsFile); console.log(JSON.stringify({ type: 'screen-added', file: filePath })); + maybeOpenBrowser(); } else { console.log(JSON.stringify({ type: 'screen-updated', file: filePath })); } @@ -308,6 +623,11 @@ function startServer() { ); watcher.close(); clearInterval(lifecycleCheck); + // Close any upgraded WebSocket sockets so server.close() can complete and + // the process actually exits instead of lingering on an open connection. + for (const socket of clients) { + try { socket.destroy(); } catch (e) { /* already gone */ } + } server.close(() => process.exit(0)); } @@ -316,11 +636,11 @@ function startServer() { try { process.kill(ownerPid, 0); return true; } catch (e) { return e.code === 'EPERM'; } } - // Check every 60s: exit if owner process died or idle for 30 minutes + // Periodically exit if the owner process died or we've been idle too long. const lifecycleCheck = setInterval(() => { if (!ownerAlive()) shutdown('owner process exited'); else if (Date.now() - lastActivity > IDLE_TIMEOUT_MS) shutdown('idle timeout'); - }, 60 * 1000); + }, LIFECYCLE_CHECK_MS); lifecycleCheck.unref(); // Validate owner PID at startup. If it's already dead, the PID resolution @@ -336,19 +656,68 @@ function startServer() { } } - server.listen(PORT, HOST, () => { + // If the preferred port is already taken (e.g. a previous server is still + // alive), fall back to a random port once instead of failing. + let triedFallback = false; + + function onListen() { + // Cookie name keys on the ACTUAL bound port (may differ from the preferred + // one after an EADDRINUSE fallback) so it can't collide with another server's + // cookie in the shared localhost jar. + COOKIE_NAME = 'brainstorm-key-' + PORT; + // Record the bound port AND token so the next restart of this session reuses + // them — but ONLY when we got our preferred port. On a fallback we bound a + // *different* port because someone else holds the preferred one; persisting + // would overwrite the shared files and strand that other session's open tab. + if (PORT_FILE && !triedFallback) { + try { fs.writeFileSync(PORT_FILE, String(PORT)); } catch (e) { /* best effort */ } + if (TOKEN_FILE) { + try { + fs.writeFileSync(TOKEN_FILE, TOKEN, { mode: 0o600 }); + chmodOwnerOnly(TOKEN_FILE); + } catch (e) { /* best effort */ } + } + } const info = JSON.stringify({ type: 'server-started', port: Number(PORT), host: HOST, - url_host: URL_HOST, url: 'http://' + URL_HOST + ':' + PORT, - screen_dir: CONTENT_DIR, state_dir: STATE_DIR + url_host: URL_HOST, url: companionUrl(), + screen_dir: CONTENT_DIR, state_dir: STATE_DIR, idle_timeout_ms: IDLE_TIMEOUT_MS }); console.log(info); - fs.writeFileSync(path.join(STATE_DIR, 'server-info'), info + '\n'); + // server-info embeds the key — keep it owner-only. + fs.writeFileSync(path.join(STATE_DIR, 'server-info'), info + '\n', { mode: 0o600 }); + } + + server.on('error', (err) => { + if (err.code === 'EADDRINUSE' && !triedFallback) { + if (tokenSource === 'env') { + console.error('Server failed to bind: preferred port is in use and BRAINSTORM_TOKEN is set; refusing fallback with explicit token'); + process.exit(1); + } + triedFallback = true; + PORT = randomPort(); + if (tokenSource === 'file') { + TOKEN = generateToken(); + tokenSource = 'generated-fallback'; + } + server.listen(PORT, HOST, onListen); + } else { + console.error('Server failed to bind:', err.message); + process.exit(1); + } }); + server.listen(PORT, HOST, onListen); } if (require.main === module) { startServer(); } -module.exports = { computeAcceptKey, encodeFrame, decodeFrame, OPCODES }; +module.exports = { + computeAcceptKey, + encodeFrame, + decodeFrame, + browserLauncherForPlatform, + OPCODES, + MAX_FRAME_PAYLOAD_BYTES +}; diff --git a/src-tauri/experts/skills/brainstorming/scripts/start-server.sh b/src-tauri/experts/skills/brainstorming/scripts/start-server.sh index 9ef6dcb9..016a8e48 100755 --- a/src-tauri/experts/skills/brainstorming/scripts/start-server.sh +++ b/src-tauri/experts/skills/brainstorming/scripts/start-server.sh @@ -11,6 +11,9 @@ # --host Host/interface to bind (default: 127.0.0.1). # Use 0.0.0.0 in remote/containerized environments. # --url-host Hostname shown in returned URL JSON. +# --idle-timeout-minutes Shut down after n minutes idle (default 240 = 4h). +# --open Auto-open the browser on the first screen (use only +# after the user approves the visual companion). # --foreground Run server in the current terminal (no backgrounding). # --background Force background mode (overrides Codex auto-foreground). @@ -22,6 +25,7 @@ FOREGROUND="false" FORCE_BACKGROUND="false" BIND_HOST="127.0.0.1" URL_HOST="" +IDLE_TIMEOUT_MINUTES="" while [[ $# -gt 0 ]]; do case "$1" in --project-dir) @@ -36,6 +40,14 @@ while [[ $# -gt 0 ]]; do URL_HOST="$2" shift 2 ;; + --idle-timeout-minutes) + IDLE_TIMEOUT_MINUTES="$2" + shift 2 + ;; + --open) + export BRAINSTORM_OPEN=1 + shift + ;; --foreground|--no-daemon) FOREGROUND="true" shift @@ -59,6 +71,29 @@ if [[ -z "$URL_HOST" ]]; then fi fi +if [[ -n "$IDLE_TIMEOUT_MINUTES" ]]; then + if ! [[ "$IDLE_TIMEOUT_MINUTES" =~ ^[0-9]+$ ]] || [[ "$IDLE_TIMEOUT_MINUTES" -lt 1 ]]; then + echo "{\"error\": \"--idle-timeout-minutes must be a positive integer\"}" + exit 1 + fi + export BRAINSTORM_IDLE_TIMEOUT_MS=$(( IDLE_TIMEOUT_MINUTES * 60 * 1000 )) +fi + +is_windows_like_shell() { + case "${OSTYPE:-}" in + msys*|cygwin*|mingw*) return 0 ;; + esac + if [[ -n "${MSYSTEM:-}" ]]; then + return 0 + fi + local uname_s + uname_s="$(uname -s 2>/dev/null || true)" + case "$uname_s" in + MSYS*|MINGW*|CYGWIN*) return 0 ;; + esac + return 1 +} + # Some environments reap detached/background processes. Auto-foreground when detected. if [[ -n "${CODEX_CI:-}" && "$FOREGROUND" != "true" && "$FORCE_BACKGROUND" != "true" ]]; then FOREGROUND="true" @@ -66,19 +101,24 @@ fi # Windows/Git Bash reaps nohup background processes. Auto-foreground when detected. if [[ "$FOREGROUND" != "true" && "$FORCE_BACKGROUND" != "true" ]]; then - case "${OSTYPE:-}" in - msys*|cygwin*|mingw*) FOREGROUND="true" ;; - esac - if [[ -n "${MSYSTEM:-}" ]]; then + if is_windows_like_shell; then FOREGROUND="true" fi fi +# Session files (server.log, server-info, .last-token) embed the session key — +# keep everything this script and the server create owner-only. +umask 077 + # Generate unique session directory SESSION_ID="$$-$(date +%s)" if [[ -n "$PROJECT_DIR" ]]; then SESSION_DIR="${PROJECT_DIR}/.superpowers/brainstorm/${SESSION_ID}" + # Persist the bound port and key per project so a restart reuses them and an + # already-open browser tab reconnects to the same URL with a valid cookie. + export BRAINSTORM_PORT_FILE="${PROJECT_DIR}/.superpowers/brainstorm/.last-port" + export BRAINSTORM_TOKEN_FILE="${PROJECT_DIR}/.superpowers/brainstorm/.last-token" else SESSION_DIR="/tmp/brainstorm-${SESSION_ID}" fi @@ -86,10 +126,21 @@ fi STATE_DIR="${SESSION_DIR}/state" PID_FILE="${STATE_DIR}/server.pid" LOG_FILE="${STATE_DIR}/server.log" +SERVER_ID_FILE="${STATE_DIR}/server-instance-id" # Create fresh session directory with content and state peers mkdir -p "${SESSION_DIR}/content" "$STATE_DIR" +SERVER_ID="" +if [[ -r /dev/urandom ]]; then + SERVER_ID="$(od -An -N24 -tx1 /dev/urandom 2>/dev/null | tr -d ' \n' || true)" +fi +if ! [[ "$SERVER_ID" =~ ^[A-Za-z0-9_-]{32,64}$ ]]; then + SERVER_ID="$(printf '%08x%08x%08x%08x' "$$" "$(date +%s)" "${RANDOM:-0}" "${RANDOM:-0}")" +fi +printf '%s\n' "$SERVER_ID" > "$SERVER_ID_FILE" +chmod 600 "$SERVER_ID_FILE" 2>/dev/null || true + # Kill any existing server if [[ -f "$PID_FILE" ]]; then old_pid=$(cat "$PID_FILE") @@ -97,7 +148,7 @@ if [[ -f "$PID_FILE" ]]; then rm -f "$PID_FILE" fi -cd "$SCRIPT_DIR" +cd "$SCRIPT_DIR" || exit 1 # Resolve the harness PID (grandparent of this script). # $PPID is the ephemeral shell the harness spawned to run us — it dies @@ -107,22 +158,32 @@ if [[ -z "$OWNER_PID" || "$OWNER_PID" == "1" ]]; then OWNER_PID="$PPID" fi +# Windows/MSYS2: Node.js cannot see POSIX PIDs from the MSYS2 namespace. +# Passing a PID node cannot verify causes server to log owner-pid-invalid +# and self-terminate at the 60-second lifecycle check. Clear it so the +# watchdog is disabled and the idle timeout becomes the only shutdown trigger. +if is_windows_like_shell; then + OWNER_PID="" +fi + # Foreground mode for environments that reap detached/background processes. if [[ "$FOREGROUND" == "true" ]]; then - echo "$$" > "$PID_FILE" - env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs + env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs "--brainstorm-server-id=$SERVER_ID" & + SERVER_PID=$! + echo "$SERVER_PID" > "$PID_FILE" + wait "$SERVER_PID" exit $? fi # Start server, capturing output to log file # Use nohup to survive shell exit; disown to remove from job table -nohup env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs > "$LOG_FILE" 2>&1 & +nohup env BRAINSTORM_DIR="$SESSION_DIR" BRAINSTORM_HOST="$BIND_HOST" BRAINSTORM_URL_HOST="$URL_HOST" BRAINSTORM_OWNER_PID="$OWNER_PID" node server.cjs "--brainstorm-server-id=$SERVER_ID" > "$LOG_FILE" 2>&1 & SERVER_PID=$! disown "$SERVER_PID" 2>/dev/null echo "$SERVER_PID" > "$PID_FILE" # Wait for server-started message (check log file) -for i in {1..50}; do +for _ in {1..50}; do if grep -q "server-started" "$LOG_FILE" 2>/dev/null; then # Verify server is still alive after a short window (catches process reapers) alive="true" diff --git a/src-tauri/experts/skills/brainstorming/scripts/stop-server.sh b/src-tauri/experts/skills/brainstorming/scripts/stop-server.sh index a6b94e65..7cacfe94 100755 --- a/src-tauri/experts/skills/brainstorming/scripts/stop-server.sh +++ b/src-tauri/experts/skills/brainstorming/scripts/stop-server.sh @@ -15,15 +15,78 @@ fi STATE_DIR="${SESSION_DIR}/state" PID_FILE="${STATE_DIR}/server.pid" +SERVER_ID_FILE="${STATE_DIR}/server-instance-id" + +mark_stopped() { + local reason="$1" + rm -f "${STATE_DIR}/server-info" + printf '{"reason":"%s","timestamp":%s}\n' "$reason" "$(date +%s)" > "${STATE_DIR}/server-stopped" +} + +read_expected_server_id() { + [[ -f "$SERVER_ID_FILE" ]] || return 1 + local id + id="$(tr -d '\r\n' < "$SERVER_ID_FILE" 2>/dev/null || true)" + [[ "$id" =~ ^[A-Za-z0-9_-]{32,64}$ ]] || return 1 + printf '%s\n' "$id" +} + +command_line_for_pid() { + local pid="$1" + if [[ -r "/proc/$pid/cmdline" ]]; then + tr '\0' '\n' < "/proc/$pid/cmdline" 2>/dev/null || true + return 0 + fi + ps -ww -p "$pid" -o command= 2>/dev/null || ps -f -p "$pid" 2>/dev/null | sed '1d' || true +} + +command_has_server_id() { + local pid="$1" + local expected="$2" + local expected_arg="--brainstorm-server-id=$expected" + if [[ -r "/proc/$pid/cmdline" ]]; then + local arg + while IFS= read -r -d '' arg || [[ -n "$arg" ]]; do + [[ "$arg" == "$expected_arg" ]] && return 0 + done < "/proc/$pid/cmdline" + return 1 + fi + local command_line + command_line="$(command_line_for_pid "$pid")" + [[ -n "$command_line" ]] || return 1 + case " $command_line " in + *" $expected_arg "*) return 0 ;; + *) return 1 ;; + esac +} + +# Confirm a PID has this session's per-start instance id, not just a familiar +# process name. Ambiguous or legacy metadata fails closed as stale_pid. +is_brainstorm_server() { + kill -0 "$1" 2>/dev/null || return 1 + local expected_id + expected_id="$(read_expected_server_id)" || return 1 + command_has_server_id "$1" "$expected_id" || return 1 + return 0 +} if [[ -f "$PID_FILE" ]]; then pid=$(cat "$PID_FILE") + # Refuse to signal a PID we can't prove is our server. A stale pid file may + # point at an unrelated process after a reboot/PID wraparound. + if ! is_brainstorm_server "$pid"; then + rm -f "$PID_FILE" "$SERVER_ID_FILE" + mark_stopped "stale_pid" + echo '{"status": "stale_pid"}' + exit 0 + fi + # Try to stop gracefully, fallback to force if still alive kill "$pid" 2>/dev/null || true # Wait for graceful shutdown (up to ~2s) - for i in {1..20}; do + for _ in {1..20}; do if ! kill -0 "$pid" 2>/dev/null; then break fi @@ -43,7 +106,8 @@ if [[ -f "$PID_FILE" ]]; then exit 1 fi - rm -f "$PID_FILE" "${STATE_DIR}/server.log" + rm -f "$PID_FILE" "$SERVER_ID_FILE" "${STATE_DIR}/server.log" + mark_stopped "stop-server.sh" # Only delete ephemeral /tmp directories if [[ "$SESSION_DIR" == /tmp/* ]]; then diff --git a/src-tauri/experts/skills/brainstorming/spec-document-reviewer-prompt.md b/src-tauri/experts/skills/brainstorming/spec-document-reviewer-prompt.md index 35acbb61..60993129 100644 --- a/src-tauri/experts/skills/brainstorming/spec-document-reviewer-prompt.md +++ b/src-tauri/experts/skills/brainstorming/spec-document-reviewer-prompt.md @@ -7,7 +7,7 @@ Use this template when dispatching a spec document reviewer subagent. **Dispatch after:** Spec document is written to docs/superpowers/specs/ ``` -Task tool (general-purpose): +Subagent (general-purpose): description: "Review spec document" prompt: | You are a spec document reviewer. Verify this spec is complete and ready for planning. diff --git a/src-tauri/experts/skills/brainstorming/visual-companion.md b/src-tauri/experts/skills/brainstorming/visual-companion.md index 2113863d..906c9ac8 100644 --- a/src-tauri/experts/skills/brainstorming/visual-companion.md +++ b/src-tauri/experts/skills/brainstorming/visual-companion.md @@ -28,20 +28,30 @@ A question *about* a UI topic is not automatically a visual question. "What kind The server watches a directory for HTML files and serves the newest one to the browser. You write HTML content to `screen_dir`, the user sees it in their browser and can click to select options. Selections are recorded to `state_dir/events` that you read on your next turn. -**Content fragments vs full documents:** If your HTML file starts with `/.superpowers/brainstorm/` for the session directory. @@ -49,33 +59,34 @@ Save `screen_dir` and `state_dir` from the response. Tell user to open the URL. **Launching the server by platform:** -**Claude Code (macOS / Linux):** +**Claude Code:** ```bash -# Default mode works — the script backgrounds the server itself -scripts/start-server.sh --project-dir /path/to/project +# Default mode works — the script backgrounds the server itself. +scripts/start-server.sh --project-dir /path/to/project --open ``` -**Claude Code (Windows):** -```bash -# Windows auto-detects and uses foreground mode, which blocks the tool call. -# Use run_in_background: true on the Bash tool call so the server survives -# across conversation turns. -scripts/start-server.sh --project-dir /path/to/project -``` -When calling this via the Bash tool, set `run_in_background: true`. Then read `$STATE_DIR/server-info` on the next turn to get the URL and port. +On Windows, the script auto-detects and switches to foreground mode (which blocks the tool call). Use `run_in_background: true` on the Bash tool call so the server survives across conversation turns, then read `$STATE_DIR/server-info` on the next turn to get the URL and port. **Codex:** ```bash # Codex reaps background processes. The script auto-detects CODEX_CI and # switches to foreground mode. Run it normally — no extra flags needed. -scripts/start-server.sh --project-dir /path/to/project +scripts/start-server.sh --project-dir /path/to/project --open ``` **Gemini CLI:** ```bash # Use --foreground and set is_background: true on your shell tool call # so the process survives across turns -scripts/start-server.sh --project-dir /path/to/project --foreground +scripts/start-server.sh --project-dir /path/to/project --open --foreground +``` + +**Copilot CLI:** +```bash +# Use --foreground and start the server via the bash tool with mode: "async" +# so the process survives across turns. Capture the returned shellId for +# read_bash / stop_bash if you need to interact with it later. +scripts/start-server.sh --project-dir /path/to/project --open --foreground ``` **Other environments:** The server must keep running in the background across conversation turns. If your environment reaps detached processes, use `--foreground` and launch the command with your platform's background execution mechanism. @@ -94,10 +105,10 @@ Use `--url-host` to control what hostname is printed in the returned URL JSON. ## The Loop 1. **Check server is alive**, then **write HTML** to a new file in `screen_dir`: - - Before each write, check that `$STATE_DIR/server-info` exists. If it doesn't (or `$STATE_DIR/server-stopped` exists), the server has shut down — restart it with `start-server.sh` before continuing. The server auto-exits after 30 minutes of inactivity. + - **Required: confirm the server is alive before referring to the URL or pushing a screen.** Check that `$STATE_DIR/server-info` exists and `$STATE_DIR/server-stopped` does not. If it has shut down, restart it with `start-server.sh` using the **same `--project-dir`** — it reuses the same port, so the user's open tab reconnects on its own (it shows a "paused" overlay while the server is down) and you don't need to send a new URL. The server auto-exits after 4 hours idle (configurable with `--idle-timeout-minutes`). - Use semantic filenames: `platform.html`, `visual-style.html`, `layout.html` - **Never reuse filenames** — each screen gets a fresh file - - Use Write tool — **never use cat/heredoc** (dumps noise into terminal) + - Use your file-creation tool — **never use cat/heredoc** (dumps noise into terminal) - Server automatically serves the newest file 2. **Tell user what to expect and end your turn:** @@ -127,7 +138,7 @@ Use `--url-host` to control what hostname is printed in the returned URL JSON. ## Writing Content Fragments -Write just the content that goes inside the page. The server wraps it in the frame template automatically (header, theme CSS, selection indicator, and all interactive infrastructure). +Write just the content that goes inside the page. The server wraps it in the frame template automatically (header, theme CSS, connection status, and all interactive infrastructure). **Minimal example:** @@ -173,7 +184,7 @@ The frame template provides these CSS classes for your content:
``` -**Multi-select:** Add `data-multiselect` to the container to let users select multiple options. Each click toggles the item. The indicator bar shows the count. +**Multi-select:** Add `data-multiselect` to the container to let users select multiple options. Each click toggles the item's selected styling. ```html
diff --git a/src-tauri/experts/skills/dispatching-parallel-agents/SKILL.md b/src-tauri/experts/skills/dispatching-parallel-agents/SKILL.md index a6a3f5a0..75e7e22c 100644 --- a/src-tauri/experts/skills/dispatching-parallel-agents/SKILL.md +++ b/src-tauri/experts/skills/dispatching-parallel-agents/SKILL.md @@ -65,14 +65,17 @@ Each agent gets: ### 3. Dispatch in Parallel -```typescript -// In Claude Code / AI environment -Task("Fix agent-tool-abort.test.ts failures") -Task("Fix batch-completion-behavior.test.ts failures") -Task("Fix tool-approval-race-conditions.test.ts failures") -// All three run concurrently +Issue all three subagent dispatches in the same response — they run in parallel: + +```text +Subagent (general-purpose): "Fix agent-tool-abort.test.ts failures" +Subagent (general-purpose): "Fix batch-completion-behavior.test.ts failures" +Subagent (general-purpose): "Fix tool-approval-race-conditions.test.ts failures" +# All three run concurrently. ``` +Multiple dispatch calls in one response = parallel execution. One per response = sequential. + ### 4. Review and Integrate When agents return: diff --git a/src-tauri/experts/skills/executing-plans/SKILL.md b/src-tauri/experts/skills/executing-plans/SKILL.md index a5918627..78d88540 100644 --- a/src-tauri/experts/skills/executing-plans/SKILL.md +++ b/src-tauri/experts/skills/executing-plans/SKILL.md @@ -11,7 +11,7 @@ Load plan, review critically, execute all tasks, report when complete. **Announce at start:** "I'm using the executing-plans skill to implement this plan." -**Note:** Tell your human partner that Superpowers works much better with access to subagents. The quality of its work will be significantly higher if run on a platform with subagent support (such as Claude Code or Codex). If subagents are available, use superpowers:subagent-driven-development instead of this skill. +**Note:** Tell your human partner that Superpowers works much better with access to subagents. The quality of its work will be significantly higher if run on a platform with subagent support (Claude Code, Codex CLI, Codex App, Copilot CLI, and Gemini CLI all qualify; see the per-platform tool refs in `../using-superpowers/references/`). If subagents are available, use superpowers:subagent-driven-development instead of this skill. ## The Process @@ -19,7 +19,7 @@ Load plan, review critically, execute all tasks, report when complete. 1. Read plan file 2. Review critically - identify any questions or concerns about the plan 3. If concerns: Raise them with your human partner before starting -4. If no concerns: Create TodoWrite and proceed +4. If no concerns: Create todos for the plan items and proceed ### Step 2: Execute Tasks diff --git a/src-tauri/experts/skills/finishing-a-development-branch/SKILL.md b/src-tauri/experts/skills/finishing-a-development-branch/SKILL.md index 43da0ae1..7f5337aa 100644 --- a/src-tauri/experts/skills/finishing-a-development-branch/SKILL.md +++ b/src-tauri/experts/skills/finishing-a-development-branch/SKILL.md @@ -123,16 +123,6 @@ git branch -d ```bash # Push branch git push -u origin - -# Create PR -gh pr create --title "" --body "$(cat <<'EOF' -## Summary -<2-3 bullets of what changed> - -## Test Plan -- [ ] <verification steps> -EOF -)" ``` **Do NOT clean up worktree** — user needs it alive to iterate on PR feedback. @@ -180,7 +170,7 @@ WORKTREE_PATH=$(git rev-parse --show-toplevel) **If `GIT_DIR == GIT_COMMON`:** Normal repo, no worktree to clean up. Done. -**If worktree path is under `.worktrees/`, `worktrees/`, or `~/.config/superpowers/worktrees/`:** Superpowers created this worktree — we own cleanup. +**If worktree path is under `.worktrees/` or `worktrees/`:** Superpowers created this worktree — we own cleanup. ```bash MAIN_ROOT=$(git -C "$(git rev-parse --git-common-dir)/.." rev-parse --show-toplevel) @@ -224,7 +214,7 @@ git worktree prune # Self-healing: clean up any stale registrations **Cleaning up harness-owned worktrees** - **Problem:** Removing a worktree the harness created causes phantom state -- **Fix:** Only clean up worktrees under `.worktrees/`, `worktrees/`, or `~/.config/superpowers/worktrees/` +- **Fix:** Only clean up worktrees under `.worktrees/` or `worktrees/` **No confirmation for discard** - **Problem:** Accidentally delete work diff --git a/src-tauri/experts/skills/receiving-code-review/SKILL.md b/src-tauri/experts/skills/receiving-code-review/SKILL.md index 4ea72cdf..4c77a10e 100644 --- a/src-tauri/experts/skills/receiving-code-review/SKILL.md +++ b/src-tauri/experts/skills/receiving-code-review/SKILL.md @@ -27,7 +27,7 @@ WHEN receiving code review feedback: ## Forbidden Responses **NEVER:** -- "You're absolutely right!" (explicit CLAUDE.md violation) +- "You're absolutely right!" (explicit instruction-file violation) - "Great point!" / "Excellent feedback!" (performative) - "Let me implement that now" (before verification) @@ -126,7 +126,7 @@ Push back when: - Reference working tests/code - Involve your human partner if architectural -**Signal if uncomfortable pushing back out loud:** "Strange things are afoot at the Circle K" +**If you're uncomfortable pushing back out loud:** Name that tension, then tell your partner about the issue you've seen. They'll appreciate your honesty. ## Acknowledging Correct Feedback diff --git a/src-tauri/experts/skills/requesting-code-review/SKILL.md b/src-tauri/experts/skills/requesting-code-review/SKILL.md index 34b83404..4b8aa605 100644 --- a/src-tauri/experts/skills/requesting-code-review/SKILL.md +++ b/src-tauri/experts/skills/requesting-code-review/SKILL.md @@ -31,7 +31,7 @@ HEAD_SHA=$(git rev-parse HEAD) **2. Dispatch code reviewer subagent:** -Use Task tool with `general-purpose` type, fill template at `code-reviewer.md` +Dispatch a `general-purpose` subagent, filling the template at [code-reviewer.md](code-reviewer.md) **Placeholders:** - `{DESCRIPTION}` - Brief summary of what you built @@ -100,4 +100,4 @@ You: [Fix progress indicators] - Show code/tests that prove it works - Request clarification -See template at: requesting-code-review/code-reviewer.md +See template at: [code-reviewer.md](code-reviewer.md) diff --git a/src-tauri/experts/skills/requesting-code-review/code-reviewer.md b/src-tauri/experts/skills/requesting-code-review/code-reviewer.md index 525e4b47..db84ae2a 100644 --- a/src-tauri/experts/skills/requesting-code-review/code-reviewer.md +++ b/src-tauri/experts/skills/requesting-code-review/code-reviewer.md @@ -5,7 +5,7 @@ Use this template when dispatching a code reviewer subagent. **Purpose:** Review completed work against requirements and code quality standards before it cascades into more work. ``` -Task tool (general-purpose): +Subagent (general-purpose): description: "Review code changes" prompt: | You are a Senior Code Reviewer with expertise in software architecture, @@ -14,22 +14,26 @@ Task tool (general-purpose): ## What Was Implemented - {DESCRIPTION} + [DESCRIPTION] ## Requirements / Plan - {PLAN_OR_REQUIREMENTS} + [PLAN_OR_REQUIREMENTS] ## Git Range to Review - **Base:** {BASE_SHA} - **Head:** {HEAD_SHA} + **Base:** [BASE_SHA] + **Head:** [HEAD_SHA] ```bash - git diff --stat {BASE_SHA}..{HEAD_SHA} - git diff {BASE_SHA}..{HEAD_SHA} + git diff --stat [BASE_SHA]..[HEAD_SHA] + git diff [BASE_SHA]..[HEAD_SHA] ``` + ## Read-Only Review + + Your review is read-only on this checkout. Do not mutate the working tree, the index, HEAD, or branch state in any way. Use tools like `git show`, `git diff`, and `git log` to inspect history. If you need a working copy of a different revision, check it out into a separate temporary directory (e.g. `git worktree add /tmp/review-[SHA] [SHA]`) — never move HEAD on this checkout. + ## What to Check **Plan alignment:** @@ -122,10 +126,10 @@ Task tool (general-purpose): ``` **Placeholders:** -- `{DESCRIPTION}` — brief summary of what was built -- `{PLAN_OR_REQUIREMENTS}` — what it should do (plan file path, task text, or requirements) -- `{BASE_SHA}` — starting commit -- `{HEAD_SHA}` — ending commit +- `[DESCRIPTION]` — brief summary of what was built +- `[PLAN_OR_REQUIREMENTS]` — what it should do (plan file path, task text, or requirements) +- `[BASE_SHA]` — starting commit +- `[HEAD_SHA]` — ending commit **Reviewer returns:** Strengths, Issues (Critical / Important / Minor), Recommendations, Assessment diff --git a/src-tauri/experts/skills/subagent-driven-development/SKILL.md b/src-tauri/experts/skills/subagent-driven-development/SKILL.md index ea7ac8fd..26760430 100644 --- a/src-tauri/experts/skills/subagent-driven-development/SKILL.md +++ b/src-tauri/experts/skills/subagent-driven-development/SKILL.md @@ -5,11 +5,14 @@ description: Use when executing implementation plans with independent tasks in t # Subagent-Driven Development -Execute plan by dispatching fresh subagent per task, with two-stage review after each: spec compliance review first, then code quality review. +Execute plan by dispatching a fresh implementer subagent per task, a task review (spec compliance + code quality) after each, and a broad whole-branch review at the end. **Why subagents:** You delegate tasks to specialized agents with isolated context. By precisely crafting their instructions and context, you ensure they stay focused and succeed at their task. They should never inherit your session's context or history — you construct exactly what they need. This also preserves your own context for coordination work. -**Core principle:** Fresh subagent per task + two-stage review (spec then quality) = high quality, fast iteration +**Core principle:** Fresh subagent per task + task review (spec + quality) + broad final review = high quality, fast iteration + +**Narration:** between tool calls, narrate at most one short line — the +ledger and the tool results carry the record. **Continuous execution:** Do not pause to check in with your human partner between tasks. Execute all tasks from the plan without stopping. The only reasons to stop are: BLOCKED status you cannot resolve, ambiguity that genuinely prevents progress, or all tasks complete. "Should I continue?" prompts and progress summaries waste their time — they asked you to execute the plan, so execute it. @@ -36,7 +39,7 @@ digraph when_to_use { **vs. Executing Plans (parallel session):** - Same session (no context switch) - Fresh subagent per task (no context pollution) -- Two-stage review after each task: spec compliance first, then code quality +- Review after each task (spec compliance + code quality), broad review at the end - Faster iteration (no human-in-loop between tasks) ## The Process @@ -51,41 +54,48 @@ digraph process { "Implementer subagent asks questions?" [shape=diamond]; "Answer questions, provide context" [shape=box]; "Implementer subagent implements, tests, commits, self-reviews" [shape=box]; - "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" [shape=box]; - "Spec reviewer subagent confirms code matches spec?" [shape=diamond]; - "Implementer subagent fixes spec gaps" [shape=box]; - "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [shape=box]; - "Code quality reviewer subagent approves?" [shape=diamond]; - "Implementer subagent fixes quality issues" [shape=box]; - "Mark task complete in TodoWrite" [shape=box]; + "Write diff file, dispatch task reviewer subagent (./task-reviewer-prompt.md)" [shape=box]; + "Task reviewer reports spec ✅ and quality approved?" [shape=diamond]; + "Dispatch fix subagent for Critical/Important findings" [shape=box]; + "Mark task complete in todo list and progress ledger" [shape=box]; } - "Read plan, extract all tasks with full text, note context, create TodoWrite" [shape=box]; + "Read plan, note context and global constraints, create todos" [shape=box]; "More tasks remain?" [shape=diamond]; - "Dispatch final code reviewer subagent for entire implementation" [shape=box]; + "Dispatch final code reviewer subagent (../requesting-code-review/code-reviewer.md)" [shape=box]; "Use superpowers:finishing-a-development-branch" [shape=box style=filled fillcolor=lightgreen]; - "Read plan, extract all tasks with full text, note context, create TodoWrite" -> "Dispatch implementer subagent (./implementer-prompt.md)"; + "Read plan, note context and global constraints, create todos" -> "Dispatch implementer subagent (./implementer-prompt.md)"; "Dispatch implementer subagent (./implementer-prompt.md)" -> "Implementer subagent asks questions?"; "Implementer subagent asks questions?" -> "Answer questions, provide context" [label="yes"]; "Answer questions, provide context" -> "Dispatch implementer subagent (./implementer-prompt.md)"; "Implementer subagent asks questions?" -> "Implementer subagent implements, tests, commits, self-reviews" [label="no"]; - "Implementer subagent implements, tests, commits, self-reviews" -> "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)"; - "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" -> "Spec reviewer subagent confirms code matches spec?"; - "Spec reviewer subagent confirms code matches spec?" -> "Implementer subagent fixes spec gaps" [label="no"]; - "Implementer subagent fixes spec gaps" -> "Dispatch spec reviewer subagent (./spec-reviewer-prompt.md)" [label="re-review"]; - "Spec reviewer subagent confirms code matches spec?" -> "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [label="yes"]; - "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" -> "Code quality reviewer subagent approves?"; - "Code quality reviewer subagent approves?" -> "Implementer subagent fixes quality issues" [label="no"]; - "Implementer subagent fixes quality issues" -> "Dispatch code quality reviewer subagent (./code-quality-reviewer-prompt.md)" [label="re-review"]; - "Code quality reviewer subagent approves?" -> "Mark task complete in TodoWrite" [label="yes"]; - "Mark task complete in TodoWrite" -> "More tasks remain?"; + "Implementer subagent implements, tests, commits, self-reviews" -> "Write diff file, dispatch task reviewer subagent (./task-reviewer-prompt.md)"; + "Write diff file, dispatch task reviewer subagent (./task-reviewer-prompt.md)" -> "Task reviewer reports spec ✅ and quality approved?"; + "Task reviewer reports spec ✅ and quality approved?" -> "Dispatch fix subagent for Critical/Important findings" [label="no"]; + "Dispatch fix subagent for Critical/Important findings" -> "Write diff file, dispatch task reviewer subagent (./task-reviewer-prompt.md)" [label="re-review"]; + "Task reviewer reports spec ✅ and quality approved?" -> "Mark task complete in todo list and progress ledger" [label="yes"]; + "Mark task complete in todo list and progress ledger" -> "More tasks remain?"; "More tasks remain?" -> "Dispatch implementer subagent (./implementer-prompt.md)" [label="yes"]; - "More tasks remain?" -> "Dispatch final code reviewer subagent for entire implementation" [label="no"]; - "Dispatch final code reviewer subagent for entire implementation" -> "Use superpowers:finishing-a-development-branch"; + "More tasks remain?" -> "Dispatch final code reviewer subagent (../requesting-code-review/code-reviewer.md)" [label="no"]; + "Dispatch final code reviewer subagent (../requesting-code-review/code-reviewer.md)" -> "Use superpowers:finishing-a-development-branch"; } ``` +## Pre-Flight Plan Review + +Before dispatching Task 1, scan the plan once for conflicts: + +- tasks that contradict each other or the plan's Global Constraints +- anything the plan explicitly mandates that the review rubric treats as a + defect (a test that asserts nothing, verbatim duplication of a logic block) + +Present everything you find to your human partner as one batched question — +each finding beside the plan text that mandates it, asking which governs — +before execution begins, not one interrupt per discovery mid-plan. If the +scan is clean, proceed without comment. The review loop remains the net for +conflicts that only emerge from implementation. + ## Model Selection Use the least powerful model that can handle each role to conserve cost and increase speed. @@ -94,9 +104,27 @@ Use the least powerful model that can handle each role to conserve cost and incr **Integration and judgment tasks** (multi-file coordination, pattern matching, debugging): use a standard model. -**Architecture, design, and review tasks**: use the most capable available model. +**Architecture and design tasks**: use the most capable available model. +The final whole-branch review is one of these — dispatch it on the most +capable available model, not the session default. + +**Review tasks**: choose the model with the same judgment, scaled to the +diff's size, complexity, and risk. A small mechanical diff does not need the +most capable model; a subtle concurrency change does. + +**Always specify the model explicitly when dispatching a subagent.** An +omitted model inherits your session's model — often the most capable and +most expensive — which silently defeats this section. -**Task complexity signals:** +**Turn count beats token price.** Wall-clock and context cost scale with how +many turns a subagent takes, and the cheapest models routinely take 2-3× the +turns on multi-step work — costing more overall. Use a mid-tier model as the +floor for reviewers and for implementers working from prose descriptions. +When the task's plan text contains the complete code to write, the +implementation is transcription plus testing: use the cheapest tier for +that implementer. Single-file mechanical fixes also take the cheapest tier. + +**Task complexity signals (implementation tasks):** - Touches 1-2 files with a complete spec → cheap model - Touches multiple files with integration concerns → standard model - Requires design judgment or broad codebase understanding → most capable model @@ -105,7 +133,7 @@ Use the least powerful model that can handle each role to conserve cost and incr Implementer subagents report one of four statuses. Handle each appropriately: -**DONE:** Proceed to spec compliance review. +**DONE:** Generate the review package (`scripts/review-package BASE HEAD`, from this skill's directory — it prints the unique file path it wrote; BASE is the commit you recorded before dispatching the implementer — never `HEAD~1`, which silently drops all but the last commit of a multi-commit task), then dispatch the task reviewer with the printed path. **DONE_WITH_CONCERNS:** The implementer completed the work but flagged doubts. Read the concerns before proceeding. If the concerns are about correctness or scope, address them before review. If they're observations (e.g., "this file is getting large"), note them and proceed to review. @@ -119,11 +147,125 @@ Implementer subagents report one of four statuses. Handle each appropriately: **Never** ignore an escalation or force the same model to retry without changes. If the implementer said it's stuck, something needs to change. +## Handling Reviewer ⚠️ Items + +The task reviewer may report "⚠️ Cannot verify from diff" items — requirements +that live in unchanged code or span tasks. These do not block the rest of the +review, but you must resolve each one yourself before marking the task +complete: you hold the plan and cross-task context the reviewer +lacks. If you confirm an item is a real gap, treat it as a failed spec +review — send it back to the implementer and re-review. + +## Constructing Reviewer Prompts + +Per-task reviews are task-scoped gates. The broad review happens once, at the +final whole-branch review. When you fill a reviewer template: + +- Do not add open-ended directives like "check all uses" or "run race tests + if useful" without a concrete, task-specific reason +- Do not ask a reviewer to re-run tests the implementer already ran on the + same code — the implementer's report carries the test evidence +- Do not pre-judge findings for the reviewer — never instruct a reviewer to + ignore or not flag a specific issue. If you believe a finding would be a + false positive, let the reviewer raise it and adjudicate it in the review + loop. If the prompt you are writing contains "do not flag," "don't treat X + as a defect," "at most Minor," or "the plan chose" — stop: you are + pre-judging, usually to spare yourself a review loop. +- The global-constraints block you hand the reviewer is its attention + lens. Copy the binding requirements verbatim from the plan's Global + Constraints section or the spec: exact values, exact formats, and the + stated relationships between components ("same layout as X", "matches + Y"). The reviewer's template already carries the process rules (YAGNI, + test hygiene, review method) — the constraints block is for what THIS + project's spec demands. +- Hand the reviewer its diff as a file: run this skill's + `scripts/review-package BASE HEAD` and pass the reviewer the file path + it prints (or, without bash: `git log --oneline`, `git diff --stat`, + and `git diff -U10` for the range, redirected to one uniquely named + file). The output never enters your own context, and the reviewer sees + the commit list, stat summary, and full diff with context in one Read + call. Use the BASE you recorded before dispatching the implementer — + never `HEAD~1`, which silently truncates multi-commit tasks. +- A dispatch prompt describes one task, not the session's history. Do not + paste accumulated prior-task summaries ("state after Tasks 1-3") into + later dispatches — a real session's dispatch hit 42k chars of which 99% + was pasted history. A fresh subagent needs its task, the interfaces it + touches, and the global constraints. Nothing else. +- Dispatch fix subagents for Critical and Important findings. Record Minor + findings in the progress ledger as you go, and point the final + whole-branch review at that list so it can triage which must be fixed + before merge. A roll-up nobody reads is a silent discard. +- A finding labeled plan-mandated — or any finding that conflicts with + what the plan's text requires — is the human's decision, like any plan + contradiction: present the finding and the plan text, ask which governs. + Do not dismiss the finding because the plan mandates it, and do not + dispatch a fix that contradicts the plan without asking. +- The final whole-branch review gets a package too: run + `scripts/review-package MERGE_BASE HEAD` (MERGE_BASE = the commit the + branch started from, e.g. `git merge-base main HEAD`) and include the + printed path in the final review dispatch, so the final reviewer reads + one file instead of re-deriving the branch diff with git commands. +- Every fix dispatch carries the implementer contract: the fix subagent + re-runs the tests covering its change and reports the results. Name the + covering test files in the dispatch — a one-line fix does not need the + whole suite. Before re-dispatching the reviewer, confirm the fix report + contains the covering tests, the command run, and the output; dispatch + the re-review once all three are present. +- If the final whole-branch review returns findings, dispatch ONE fix + subagent with the complete findings list — not one fixer per finding. + Per-finding fixers each rebuild context and re-run suites; a real + session's final-review fix wave cost more than all its tasks combined. + +## File Handoffs + +Everything you paste into a dispatch prompt — and everything a subagent +prints back — stays resident in your context for the rest of the session +and is re-read on every later turn. Hand artifacts over as files: + +- **Task brief:** before dispatching an implementer, run this skill's + `scripts/task-brief PLAN_FILE N` — it extracts the task's full text to a + uniquely named file and prints the path. Compose the dispatch so the + brief stays the single source of requirements. Your dispatch should + contain: (1) one line on where this task fits in the project; (2) the + brief path, introduced as "read this first — it is your requirements, + with the exact values to use verbatim"; (3) interfaces and decisions + from earlier tasks that the brief cannot know; (4) your resolution of + any ambiguity you noticed in the brief; (5) the report-file path and + report contract. Exact values (numbers, magic strings, signatures, test + cases) appear only in the brief. +- **Report file:** name the implementer's report file after the brief + (brief `…/task-N-brief.md` → report `…/task-N-report.md`) and put it in + the dispatch prompt. The implementer writes the full report there and + returns only status, commits, a one-line test summary, and concerns. +- **Reviewer inputs:** the task reviewer gets three paths — the same brief + file, the report file, and the review package — plus the global + constraints that bind the task. +- Fix dispatches append their fix report (with test results) to the same + report file and return a short summary; re-reviews read the updated file. + +## Durable Progress + +Conversation memory does not survive compaction. In real sessions, +controllers that lost their place have re-dispatched entire completed task +sequences — the single most expensive failure observed. Track progress in +a ledger file, not only in todos. + +- At skill start, check for a ledger: + `cat "$(git rev-parse --git-path sdd)/progress.md"`. Tasks listed there + as complete are DONE — do not re-dispatch them; resume at the first task + not marked complete. +- When a task's review comes back clean, append one line to the ledger in + the same message as your other bookkeeping: + `Task N: complete (commits <base7>..<head7>, review clean)`. +- The ledger is your recovery map: the commits it names exist in git even + when your context no longer remembers creating them. After compaction, + trust the ledger and `git log` over your own recollection. + ## Prompt Templates -- `./implementer-prompt.md` - Dispatch implementer subagent -- `./spec-reviewer-prompt.md` - Dispatch spec compliance reviewer subagent -- `./code-quality-reviewer-prompt.md` - Dispatch code quality reviewer subagent +- [implementer-prompt.md](implementer-prompt.md) - Dispatch implementer subagent +- [task-reviewer-prompt.md](task-reviewer-prompt.md) - Dispatch task reviewer subagent (spec compliance + code quality) +- Final whole-branch review: use superpowers:requesting-code-review's [code-reviewer.md](../requesting-code-review/code-reviewer.md) ## Example Workflow @@ -131,13 +273,11 @@ Implementer subagents report one of four statuses. Handle each appropriately: You: I'm using Subagent-Driven Development to execute this plan. [Read plan file once: docs/superpowers/plans/feature-plan.md] -[Extract all 5 tasks with full text and context] -[Create TodoWrite with all tasks] +[Create todos for all tasks] Task 1: Hook installation script -[Get Task 1 text and context (already extracted)] -[Dispatch implementation subagent with full task text + context] +[Run task-brief for Task 1; dispatch implementer with brief + report paths + context] Implementer: "Before I begin - should the hook be installed at user or system level?" @@ -150,18 +290,15 @@ Implementer: "Got it. Implementing now..." - Self-review: Found I missed --force flag, added it - Committed -[Dispatch spec compliance reviewer] -Spec reviewer: ✅ Spec compliant - all requirements met, nothing extra - -[Get git SHAs, dispatch code quality reviewer] -Code reviewer: Strengths: Good test coverage, clean. Issues: None. Approved. +[Run review-package, dispatch task reviewer with the printed path] +Task reviewer: Spec ✅ - all requirements met, nothing extra. + Strengths: Good test coverage, clean. Issues: None. Task quality: Approved. [Mark Task 1 complete] Task 2: Recovery modes -[Get Task 2 text and context (already extracted)] -[Dispatch implementation subagent with full task text + context] +[Run task-brief for Task 2; dispatch implementer with brief + report paths + context] Implementer: [No questions, proceeds] Implementer: @@ -170,25 +307,17 @@ Implementer: - Self-review: All good - Committed -[Dispatch spec compliance reviewer] -Spec reviewer: ❌ Issues: +[Run review-package, dispatch task reviewer with the printed path] +Task reviewer: Spec ❌: - Missing: Progress reporting (spec says "report every 100 items") - Extra: Added --json flag (not requested) + Issues (Important): Magic number (100) -[Implementer fixes issues] -Implementer: Removed --json flag, added progress reporting - -[Spec reviewer reviews again] -Spec reviewer: ✅ Spec compliant now - -[Dispatch code quality reviewer] -Code reviewer: Strengths: Solid. Issues (Important): Magic number (100) - -[Implementer fixes] -Implementer: Extracted PROGRESS_INTERVAL constant +[Dispatch fix subagent with all findings] +Fixer: Removed --json flag, added progress reporting, extracted PROGRESS_INTERVAL constant -[Code reviewer reviews again] -Code reviewer: ✅ Approved +[Task reviewer reviews again] +Task reviewer: Spec ✅. Task quality: Approved. [Mark Task 2 complete] @@ -215,20 +344,20 @@ Done! - Review checkpoints automatic **Efficiency gains:** -- No file reading overhead (controller provides full text) -- Controller curates exactly what context is needed +- Controller curates exactly what context is needed; bulk artifacts move + as files, not pasted text - Subagent gets complete information upfront - Questions surfaced before work begins (not after) **Quality gates:** - Self-review catches issues before handoff -- Two-stage review: spec compliance, then code quality +- Task review carries two verdicts: spec compliance and code quality - Review loops ensure fixes actually work - Spec compliance prevents over/under-building - Code quality ensures implementation is well-built **Cost:** -- More subagent invocations (implementer + 2 reviewers per task) +- More subagent invocations (implementer + reviewer per task) - Controller does more prep work (extracting all tasks upfront) - Review loops add iterations - But catches issues early (cheaper than debugging later) @@ -237,17 +366,25 @@ Done! **Never:** - Start implementation on main/master branch without explicit user consent -- Skip reviews (spec compliance OR code quality) +- Skip task review, or accept a report missing either verdict (spec compliance AND task quality are both required) - Proceed with unfixed issues - Dispatch multiple implementation subagents in parallel (conflicts) -- Make subagent read plan file (provide full text instead) +- Make a subagent read the whole plan file (hand it its task brief — + `scripts/task-brief` — instead) - Skip scene-setting context (subagent needs to understand where task fits) - Ignore subagent questions (answer before letting them proceed) -- Accept "close enough" on spec compliance (spec reviewer found issues = not done) +- Accept "close enough" on spec compliance (reviewer found spec issues = not done) - Skip review loops (reviewer found issues = implementer fixes = review again) - Let implementer self-review replace actual review (both are needed) -- **Start code quality review before spec compliance is ✅** (wrong order) -- Move to next task while either review has open issues +- Tell a reviewer what not to flag, or pre-rate a finding's severity in the + dispatch prompt ("treat it as Minor at most") — the plan's example code is + a starting point, not evidence that its weaknesses were chosen +- Dispatch a task reviewer without a diff file — generate it first + (`scripts/review-package BASE HEAD`) and name the printed path in the + prompt +- Move to next task while the review has open Critical/Important issues +- Re-dispatch a task the progress ledger already marks complete — check + the ledger (and `git log`) after any compaction or resume **If subagent asks questions:** - Answer clearly and completely @@ -269,7 +406,7 @@ Done! **Required workflow skills:** - **superpowers:using-git-worktrees** - Ensures isolated workspace (creates one or verifies existing) - **superpowers:writing-plans** - Creates the plan this skill executes -- **superpowers:requesting-code-review** - Code review template for reviewer subagents +- **superpowers:requesting-code-review** - Code review template for the final whole-branch review - **superpowers:finishing-a-development-branch** - Complete development after all tasks **Subagents should use:** diff --git a/src-tauri/experts/skills/subagent-driven-development/code-quality-reviewer-prompt.md b/src-tauri/experts/skills/subagent-driven-development/code-quality-reviewer-prompt.md deleted file mode 100644 index 51f901a5..00000000 --- a/src-tauri/experts/skills/subagent-driven-development/code-quality-reviewer-prompt.md +++ /dev/null @@ -1,25 +0,0 @@ -# Code Quality Reviewer Prompt Template - -Use this template when dispatching a code quality reviewer subagent. - -**Purpose:** Verify implementation is well-built (clean, tested, maintainable) - -**Only dispatch after spec compliance review passes.** - -``` -Task tool (general-purpose): - Use template at requesting-code-review/code-reviewer.md - - DESCRIPTION: [task summary, from implementer's report] - PLAN_OR_REQUIREMENTS: Task N from [plan-file] - BASE_SHA: [commit before task] - HEAD_SHA: [current commit] -``` - -**In addition to standard code quality concerns, the reviewer should check:** -- Does each file have one clear responsibility with a well-defined interface? -- Are units decomposed so they can be understood and tested independently? -- Is the implementation following the file structure from the plan? -- Did this implementation create new files that are already large, or significantly grow existing files? (Don't flag pre-existing file sizes — focus on what this change contributed.) - -**Code reviewer returns:** Strengths, Issues (Critical/Important/Minor), Assessment diff --git a/src-tauri/experts/skills/subagent-driven-development/implementer-prompt.md b/src-tauri/experts/skills/subagent-driven-development/implementer-prompt.md index 400c1034..218fcfeb 100644 --- a/src-tauri/experts/skills/subagent-driven-development/implementer-prompt.md +++ b/src-tauri/experts/skills/subagent-driven-development/implementer-prompt.md @@ -3,14 +3,17 @@ Use this template when dispatching an implementer subagent. ``` -Task tool (general-purpose): +Subagent (general-purpose): description: "Implement Task N: [task name]" + model: [MODEL — REQUIRED: choose per SKILL.md Model Selection; an omitted + model silently inherits the session's most expensive one] prompt: | You are implementing Task N: [task name] ## Task Description - [FULL TEXT of task from plan - paste it here, don't make subagent read file] + Read your task brief first: [BRIEF_FILE] + It contains the full task text from the plan. ## Context @@ -41,6 +44,9 @@ Task tool (general-purpose): **While you work:** If you encounter something unexpected or unclear, **ask questions**. It's always OK to pause and clarify. Don't guess or make assumptions. + While iterating, run the focused test for what you're changing; run the + full suite once before committing, not after every edit. + ## Code Organization You reason best about code you can hold in context at once, and your edits are more @@ -94,19 +100,39 @@ Task tool (general-purpose): - Do tests actually verify behavior (not just mock behavior)? - Did I follow TDD if required? - Are tests comprehensive? + - Is the test output pristine (no stray warnings or noise)? If you find issues during self-review, fix them now before reporting. + ## After Review Findings + + If a reviewer finds issues and you fix them, re-run the tests that cover + the amended code and append the results to your report file. Reviewers + will not re-run tests for you — your report is the test evidence. + ## Report Format - When done, report: - - **Status:** DONE | DONE_WITH_CONCERNS | BLOCKED | NEEDS_CONTEXT + Write your full report to [REPORT_FILE]: - What you implemented (or what you attempted, if blocked) - What you tested and test results + - **TDD Evidence** (if TDD was required for this task): + - RED: command run, relevant failing output before implementation, and why the failure was expected + - GREEN: command run and relevant passing output after implementation - Files changed - Self-review findings (if any) - Any issues or concerns + Then report back with ONLY (under 15 lines — the detail lives in the + report file): + - **Status:** DONE | DONE_WITH_CONCERNS | BLOCKED | NEEDS_CONTEXT + - Commits created (short SHA + subject) + - One-line test summary (e.g. "14/14 passing, output pristine") + - Your concerns, if any + - The report file path + + If BLOCKED or NEEDS_CONTEXT, put the specifics in the final message + itself — the controller acts on it directly. + Use DONE_WITH_CONCERNS if you completed the work but have doubts about correctness. Use BLOCKED if you cannot complete the task. Use NEEDS_CONTEXT if you need information that wasn't provided. Never silently produce work you're unsure about. diff --git a/src-tauri/experts/skills/subagent-driven-development/scripts/review-package b/src-tauri/experts/skills/subagent-driven-development/scripts/review-package new file mode 100755 index 00000000..88a00224 --- /dev/null +++ b/src-tauri/experts/skills/subagent-driven-development/scripts/review-package @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Generate a review package: commit list, stat summary, and the net +# diff with extended context, written to a file the reviewer reads in one +# call. Using the recorded per-task BASE (not HEAD~1) keeps multi-commit +# tasks intact. +# +# Usage: review-package BASE HEAD [OUTFILE] +# Default OUTFILE: <git-dir>/sdd/review-<base7>..<head7>.diff — unique per +# repo instance and per range, so concurrent sessions cannot collide and a +# re-review after fixes always gets a distinctly named fresh file. +set -euo pipefail + +if [ $# -lt 2 ] || [ $# -gt 3 ]; then + echo "usage: review-package BASE HEAD [OUTFILE]" >&2 + exit 2 +fi + +base=$1 +head=$2 + +git rev-parse --verify --quiet "$base" >/dev/null || { echo "bad BASE: $base" >&2; exit 2; } +git rev-parse --verify --quiet "$head" >/dev/null || { echo "bad HEAD: $head" >&2; exit 2; } + +if [ $# -eq 3 ]; then + out=$3 +else + dir=$(git rev-parse --git-path sdd) + mkdir -p "$dir" + dir=$(cd "$dir" && pwd) + out="$dir/review-$(git rev-parse --short "$base")..$(git rev-parse --short "$head").diff" +fi + +{ + echo "# Review package: ${base}..${head}" + echo + echo "## Commits" + git log --oneline "${base}..${head}" + echo + echo "## Files changed" + git diff --stat "${base}..${head}" + echo + echo "## Diff" + git diff -U10 "${base}..${head}" +} > "$out" + +commits=$(git rev-list --count "${base}..${head}") +echo "wrote ${out}: ${commits} commit(s), $(wc -c < "$out" | tr -d ' ') bytes" diff --git a/src-tauri/experts/skills/subagent-driven-development/scripts/task-brief b/src-tauri/experts/skills/subagent-driven-development/scripts/task-brief new file mode 100755 index 00000000..b046a2bb --- /dev/null +++ b/src-tauri/experts/skills/subagent-driven-development/scripts/task-brief @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Extract one task's full text from an implementation plan into a file the +# implementer reads in one call, so the task text never has to be pasted +# through the controller's context. +# +# Usage: task-brief PLAN_FILE TASK_NUMBER [OUTFILE] +# Default OUTFILE: <git-dir>/sdd/task-<N>-brief.md — unique per repo +# instance, so concurrent sessions cannot collide. +set -euo pipefail + +if [ $# -lt 2 ] || [ $# -gt 3 ]; then + echo "usage: task-brief PLAN_FILE TASK_NUMBER [OUTFILE]" >&2 + exit 2 +fi + +plan=$1 +n=$2 +[ -f "$plan" ] || { echo "no such plan file: $plan" >&2; exit 2; } + +if [ $# -eq 3 ]; then + out=$3 +else + dir=$(git rev-parse --git-path sdd) + mkdir -p "$dir" + dir=$(cd "$dir" && pwd) + out="$dir/task-${n}-brief.md" +fi + +awk -v n="$n" ' + /^```/ { infence = !infence } + !infence && /^#+[ \t]+Task[ \t]+[0-9]+/ { + intask = ($0 ~ ("^#+[ \t]+Task[ \t]+" n "([^0-9]|$)")) + } + intask { print } +' "$plan" > "$out" + +if [ ! -s "$out" ]; then + echo "task ${n} not found in ${plan} (no heading matching 'Task ${n}')" >&2 + exit 3 +fi + +echo "wrote ${out}: $(wc -l < "$out" | tr -d ' ') lines" diff --git a/src-tauri/experts/skills/subagent-driven-development/spec-reviewer-prompt.md b/src-tauri/experts/skills/subagent-driven-development/spec-reviewer-prompt.md deleted file mode 100644 index ab5ddb8a..00000000 --- a/src-tauri/experts/skills/subagent-driven-development/spec-reviewer-prompt.md +++ /dev/null @@ -1,61 +0,0 @@ -# Spec Compliance Reviewer Prompt Template - -Use this template when dispatching a spec compliance reviewer subagent. - -**Purpose:** Verify implementer built what was requested (nothing more, nothing less) - -``` -Task tool (general-purpose): - description: "Review spec compliance for Task N" - prompt: | - You are reviewing whether an implementation matches its specification. - - ## What Was Requested - - [FULL TEXT of task requirements] - - ## What Implementer Claims They Built - - [From implementer's report] - - ## CRITICAL: Do Not Trust the Report - - The implementer finished suspiciously quickly. Their report may be incomplete, - inaccurate, or optimistic. You MUST verify everything independently. - - **DO NOT:** - - Take their word for what they implemented - - Trust their claims about completeness - - Accept their interpretation of requirements - - **DO:** - - Read the actual code they wrote - - Compare actual implementation to requirements line by line - - Check for missing pieces they claimed to implement - - Look for extra features they didn't mention - - ## Your Job - - Read the implementation code and verify: - - **Missing requirements:** - - Did they implement everything that was requested? - - Are there requirements they skipped or missed? - - Did they claim something works but didn't actually implement it? - - **Extra/unneeded work:** - - Did they build things that weren't requested? - - Did they over-engineer or add unnecessary features? - - Did they add "nice to haves" that weren't in spec? - - **Misunderstandings:** - - Did they interpret requirements differently than intended? - - Did they solve the wrong problem? - - Did they implement the right feature but wrong way? - - **Verify by reading code, not by trusting report.** - - Report: - - ✅ Spec compliant (if everything matches after code inspection) - - ❌ Issues found: [list specifically what's missing or extra, with file:line references] -``` diff --git a/src-tauri/experts/skills/subagent-driven-development/task-reviewer-prompt.md b/src-tauri/experts/skills/subagent-driven-development/task-reviewer-prompt.md new file mode 100644 index 00000000..588a4022 --- /dev/null +++ b/src-tauri/experts/skills/subagent-driven-development/task-reviewer-prompt.md @@ -0,0 +1,188 @@ +# Task Reviewer Prompt Template + +Use this template when dispatching a task reviewer subagent. The reviewer +reads the task's diff once and returns two verdicts: spec compliance and +code quality. + +**Purpose:** Verify one task's implementation matches its requirements (nothing +more, nothing less) and is well-built (clean, tested, maintainable) + +``` +Subagent (general-purpose): + description: "Review Task N (spec + quality)" + model: [MODEL — REQUIRED: choose per SKILL.md Model Selection; an omitted + model silently inherits the session's most expensive one] + prompt: | + You are reviewing one task's implementation: first whether it matches its + requirements, then whether it is well-built. This is a task-scoped gate, + not a merge review — a broad whole-branch review happens separately after + all tasks are complete. + + ## What Was Requested + + Read the task brief: [BRIEF_FILE] + + Global constraints from the spec/design that bind this task: + [GLOBAL_CONSTRAINTS] + + ## What the Implementer Claims They Built + + Read the implementer's report: [REPORT_FILE] + + ## Diff Under Review + + **Base:** [BASE_SHA] + **Head:** [HEAD_SHA] + **Diff file:** [DIFF_FILE] + + Read the diff file once — it contains the commit list, a stat summary, + and the full diff with surrounding context, and it is your view of the + change. The diff's context lines ARE the changed files: do not Read a + changed file separately unless a hunk you must judge is cut off + mid-function — and say so in your report. Do not re-run git commands. + If the diff file is missing, fetch the diff yourself: + `git diff --stat [BASE_SHA]..[HEAD_SHA]` and `git diff [BASE_SHA]..[HEAD_SHA]`. + Do not crawl the broader codebase. Inspect code outside the diff only + to evaluate a concrete risk you can name — one focused check per named + risk, and name both the risk and what you checked in your report. + Cross-cutting changes are legitimate named risks: if the diff changes + lock ordering, a function or API contract, or shared mutable state, + checking the call sites is the right method. + + Your review is read-only on this checkout. Do not mutate the working + tree, the index, HEAD, or branch state in any way. + + ## Do Not Trust the Report + + Treat the implementer's report as unverified claims about the code. It + may be incomplete, inaccurate, or optimistic. Verify the claims against + the diff. Design rationales in the report are claims too: "left it per + YAGNI," "kept it simple deliberately," or any other justification is the + implementer grading their own work. Judge the code on its merits — a + stated rationale never downgrades a finding's severity. + + ## Tests + + The implementer already ran the tests and reported results with TDD + evidence for exactly this code. Do not re-run the suite to confirm their + report. Run a test only when reading the code raises a specific doubt + that no existing run answers — and then a focused test, never a + package-wide suite, race detector run, or repeated/high-count loop. If + heavy validation seems warranted, recommend it in your report instead of + running it. If you cannot run commands in this environment, name the + test you would run. + + Warnings or other noise in the implementer's reported test output are + findings — test output should be pristine. + + ## Part 1: Spec Compliance + + Compare the diff against What Was Requested: + + - **Missing:** requirements they skipped, missed, or claimed without + implementing + - **Extra:** features that weren't requested, over-engineering, unneeded + "nice to haves" + - **Misunderstood:** right feature built the wrong way, wrong problem + solved + + If a requirement cannot be verified from this diff alone (it lives in + unchanged code or spans tasks), report it as a ⚠️ item instead of + broadening your search. + + ## Part 2: Code Quality + + **Code quality:** + - Clean separation of concerns? + - Proper error handling? + - DRY without premature abstraction? + - Edge cases handled? + + **Tests:** + - Do the new and changed tests verify real behavior, not mocks? + - Are the task's edge cases covered? + + **Structure:** + - Does each file have one clear responsibility with a well-defined interface? + - Are units decomposed so they can be understood and tested independently? + - Is the implementation following the file structure from the plan? + - Did this change create new files that are already large, or + significantly grow existing files? (Don't flag pre-existing file + sizes — focus on what this change contributed.) + + Your report should point at evidence: file:line references for every + finding and for any check you would otherwise answer with a bare + "yes." A tight report that cites lines gives the controller everything + it needs. + + Your final message is the report itself: begin directly with the + spec-compliance verdict. Every line is a verdict, a finding with + file:line, or a check you ran — no preamble, no process narration, + no closing summary. + + ## Calibration + + Categorize issues by actual severity. Not everything is Critical. + Important means this task cannot be trusted until it is fixed: incorrect + or fragile behavior, a missed requirement, or maintainability damage you + would block a merge over — verbatim duplication of a logic block, + swallowed errors, tests that assert nothing. "Coverage could be broader" + and polish suggestions are Minor. + If the plan or brief explicitly mandates something this rubric calls a + defect (a test that asserts nothing, verbatim duplication of a logic + block), that IS a finding — report it as Important, labeled + plan-mandated. The plan's authorship does not grade its own work; the + human decides. + Acknowledge what was done well before listing issues — accurate praise + helps the implementer trust the rest of the feedback. + + ## Output Format + + ### Spec Compliance + + - ✅ Spec compliant | ❌ Issues found: [what's missing/extra/misunderstood, + with file:line references] + - ⚠️ Cannot verify from diff: [requirements you could not verify from the + diff alone, and what the controller should check — report alongside the + ✅/❌ verdict for everything you could verify] + + ### Strengths + [What's well done? Be specific.] + + ### Issues + + #### Critical (Must Fix) + #### Important (Should Fix) + #### Minor (Nice to Have) + + For each issue: file:line, what's wrong, why it matters, how to fix + (if not obvious). + + ### Assessment + + **Task quality:** [Approved | Needs fixes] + + **Reasoning:** [1-2 sentence technical assessment] +``` + +**Placeholders:** +- `[MODEL]` — REQUIRED: reviewer model per SKILL.md Model Selection +- `[BRIEF_FILE]` — REQUIRED: the task brief file (`scripts/task-brief PLAN N` + prints the path; same file the implementer worked from) +- `[GLOBAL_CONSTRAINTS]` — the binding requirements copied verbatim from + the plan's Global Constraints section or the spec: exact values, formats, + and stated relationships between components (not process rules — those + are already in this template) +- `[REPORT_FILE]` — REQUIRED: the file the implementer wrote its detailed + report to +- `[BASE_SHA]` — commit before this task +- `[HEAD_SHA]` — current commit +- `[DIFF_FILE]` — REQUIRED: the path the controller wrote the review + package to (`scripts/review-package BASE HEAD` prints the unique path it + wrote; the package never enters the controller's context) + +**Reviewer returns:** Spec Compliance verdict (✅/❌/⚠️), Strengths, Issues +(Critical/Important/Minor), Task quality verdict + +A fix dispatch can address spec gaps and quality findings together; +re-review after fixes covers both verdicts. diff --git a/src-tauri/experts/skills/systematic-debugging/SKILL.md b/src-tauri/experts/skills/systematic-debugging/SKILL.md index 111d2a98..b0eca38b 100644 --- a/src-tauri/experts/skills/systematic-debugging/SKILL.md +++ b/src-tauri/experts/skills/systematic-debugging/SKILL.md @@ -237,7 +237,7 @@ If you catch yourself thinking: - "Is that not happening?" - You assumed without verifying - "Will it show us...?" - You should have added evidence gathering - "Stop guessing" - You're proposing fixes without understanding -- "Ultrathink this" - Question fundamentals, not just symptoms +- "Ultra-think this" - Question fundamentals, not just symptoms - "We're stuck?" (frustrated) - Your approach isn't working **When you see these:** STOP. Return to Phase 1. diff --git a/src-tauri/experts/skills/test-driven-development/SKILL.md b/src-tauri/experts/skills/test-driven-development/SKILL.md index 7a751fa9..60d2609c 100644 --- a/src-tauri/experts/skills/test-driven-development/SKILL.md +++ b/src-tauri/experts/skills/test-driven-development/SKILL.md @@ -356,7 +356,7 @@ Never fix bugs without a test. ## Testing Anti-Patterns -When adding mocks or test utilities, read @testing-anti-patterns.md to avoid common pitfalls: +When adding mocks or test utilities, read [testing-anti-patterns.md](testing-anti-patterns.md) to avoid common pitfalls: - Testing mock behavior instead of real behavior - Adding test-only methods to production classes - Mocking without understanding dependencies diff --git a/src-tauri/experts/skills/using-git-worktrees/SKILL.md b/src-tauri/experts/skills/using-git-worktrees/SKILL.md index 134d3714..212c5692 100644 --- a/src-tauri/experts/skills/using-git-worktrees/SKILL.md +++ b/src-tauri/experts/skills/using-git-worktrees/SKILL.md @@ -30,7 +30,7 @@ BRANCH=$(git branch --show-current) git rev-parse --show-superproject-working-tree 2>/dev/null ``` -**If `GIT_DIR != GIT_COMMON` (and not a submodule):** You are already in a linked worktree. Skip to Step 3 (Project Setup). Do NOT create another worktree. +**If `GIT_DIR != GIT_COMMON` (and not a submodule):** You are already in a linked worktree. Skip to Step 2 (Project Setup). Do NOT create another worktree. Report with branch state: - On a branch: "Already in isolated workspace at `<path>` on branch `<name>`." @@ -42,7 +42,7 @@ Has the user already indicated their worktree preference in your instructions? I > "Would you like me to set up an isolated worktree? It protects your current branch from changes." -Honor any existing declared preference without asking. If the user declines consent, work in place and skip to Step 3. +Honor any existing declared preference without asking. If the user declines consent, work in place and skip to Step 2. ## Step 1: Create Isolated Workspace @@ -50,7 +50,7 @@ Honor any existing declared preference without asking. If the user declines cons ### 1a. Native Worktree Tools (preferred) -The user has asked for an isolated workspace (Step 0 consent). Do you already have a way to create a worktree? It might be a tool with a name like `EnterWorktree`, `WorktreeCreate`, a `/worktree` command, or a `--worktree` flag. If you do, use it and skip to Step 3. +The user has asked for an isolated workspace (Step 0 consent). Do you already have a way to create a worktree? It might be a tool with a name like `EnterWorktree`, `WorktreeCreate`, a `/worktree` command, or a `--worktree` flag. If you do, use it and skip to Step 2. Native tools handle directory placement, branch creation, and cleanup automatically. Using `git worktree add` when you have a native tool creates phantom state your harness can't see or manage. @@ -73,14 +73,7 @@ Follow this priority order. Explicit user preference always beats observed files ``` If found, use it. If both exist, `.worktrees` wins. -3. **Check for an existing global directory:** - ```bash - project=$(basename "$(git rev-parse --show-toplevel)") - ls -d ~/.config/superpowers/worktrees/$project 2>/dev/null - ``` - If found, use it (backward compatibility with legacy global path). - -4. **If there is no other guidance available**, default to `.worktrees/` at the project root. +3. **If there is no other guidance available**, default to `.worktrees/` at the project root. #### Safety Verification (project-local directories only) @@ -94,16 +87,11 @@ git check-ignore -q .worktrees 2>/dev/null || git check-ignore -q worktrees 2>/d **Why critical:** Prevents accidentally committing worktree contents to repository. -Global directories (`~/.config/superpowers/worktrees/`) need no verification. - #### Create the Worktree ```bash -project=$(basename "$(git rev-parse --show-toplevel)") - # Determine path based on chosen location -# For project-local: path="$LOCATION/$BRANCH_NAME" -# For global: path="~/.config/superpowers/worktrees/$project/$BRANCH_NAME" +path="$LOCATION/$BRANCH_NAME" git worktree add "$path" -b "$BRANCH_NAME" cd "$path" @@ -111,7 +99,7 @@ cd "$path" **Sandbox fallback:** If `git worktree add` fails with a permission error (sandbox denial), tell the user the sandbox blocked worktree creation and you're working in the current directory instead. Then run setup and baseline tests in place. -## Step 3: Project Setup +## Step 2: Project Setup Auto-detect and run appropriate setup: @@ -130,7 +118,7 @@ if [ -f pyproject.toml ]; then poetry install; fi if [ -f go.mod ]; then go mod download; fi ``` -## Step 4: Verify Clean Baseline +## Step 3: Verify Clean Baseline Run tests to ensure workspace starts clean: @@ -163,7 +151,6 @@ Ready to implement <feature-name> | `worktrees/` exists | Use it (verify ignored) | | Both exist | Use `.worktrees/` | | Neither exists | Check instruction file, then default `.worktrees/` | -| Global path exists | Use it (backward compat) | | Directory not ignored | Add to .gitignore + commit | | Permission error on create | Sandbox fallback, work in place | | Tests fail during baseline | Report failures + ask | @@ -189,7 +176,7 @@ Ready to implement <feature-name> ### Assuming directory location - **Problem:** Creates inconsistency, violates project conventions -- **Fix:** Follow priority: existing > global legacy > instruction file > default +- **Fix:** Follow priority: explicit instructions > existing project-local directory > default ### Proceeding with failing tests @@ -209,7 +196,7 @@ Ready to implement <feature-name> **Always:** - Run Step 0 detection first - Prefer native tools over git fallback -- Follow directory priority: existing > global legacy > instruction file > default +- Follow directory priority: explicit instructions > existing project-local directory > default - Verify directory is ignored for project-local - Auto-detect and run project setup - Verify clean test baseline diff --git a/src-tauri/experts/skills/using-superpowers/SKILL.md b/src-tauri/experts/skills/using-superpowers/SKILL.md index c8a85702..53712217 100644 --- a/src-tauri/experts/skills/using-superpowers/SKILL.md +++ b/src-tauri/experts/skills/using-superpowers/SKILL.md @@ -1,6 +1,6 @@ --- name: using-superpowers -description: Use when starting any conversation - establishes how to find and use skills, requiring Skill tool invocation before ANY response including clarifying questions +description: Use when starting any conversation - establishes how to find and use skills, requiring skill invocation before ANY response including clarifying questions --- <SUBAGENT-STOP> @@ -27,9 +27,13 @@ If CLAUDE.md, GEMINI.md, or AGENTS.md says "don't use TDD" and a skill says "alw ## How to Access Skills -**In Claude Code:** Use the `Skill` tool. When you invoke a skill, its content is loaded and presented to you—follow it directly. Never use the Read tool on skill files. +**Never read skill files manually with file tools** — always use your platform's skill-loading mechanism so the skill is properly activated. -**In Copilot CLI:** Use the `skill` tool. Skills are auto-discovered from installed plugins. The `skill` tool works the same as Claude Code's `Skill` tool. +**In Claude Code:** Use the `Skill` tool. When you invoke a skill, its content is loaded and presented to you — follow it directly. + +**In Codex:** Skills load natively. Follow the instructions presented when a skill activates. + +**In Copilot CLI:** Use the `skill` tool. Skills are auto-discovered from installed plugins. **In Gemini CLI:** Skills activate via the `activate_skill` tool. Gemini loads skill metadata at session start and activates the full content on demand. @@ -37,7 +41,7 @@ If CLAUDE.md, GEMINI.md, or AGENTS.md says "don't use TDD" and a skill says "alw ## Platform Adaptation -Skills use Claude Code tool names. Non-CC platforms: see `references/copilot-tools.md` (Copilot CLI), `references/codex-tools.md` (Codex) for tool equivalents. Gemini CLI users get the tool mapping loaded automatically via GEMINI.md. +Skills speak in actions ("dispatch a subagent", "create a todo", "read a file") rather than naming any one runtime's tools. For per-platform tool equivalents and instructions-file conventions, see [claude-code-tools.md](references/claude-code-tools.md), [codex-tools.md](references/codex-tools.md), [copilot-tools.md](references/copilot-tools.md), [gemini-tools.md](references/gemini-tools.md), [pi-tools.md](references/pi-tools.md), and [antigravity-tools.md](references/antigravity-tools.md). Gemini CLI users get the tool mapping loaded automatically via GEMINI.md. # Using Skills @@ -48,30 +52,30 @@ Skills use Claude Code tool names. Non-CC platforms: see `references/copilot-too ```dot digraph skill_flow { "User message received" [shape=doublecircle]; - "About to EnterPlanMode?" [shape=doublecircle]; + "About to enter plan mode?" [shape=doublecircle]; "Already brainstormed?" [shape=diamond]; "Invoke brainstorming skill" [shape=box]; "Might any skill apply?" [shape=diamond]; - "Invoke Skill tool" [shape=box]; + "Invoke the skill" [shape=box]; "Announce: 'Using [skill] to [purpose]'" [shape=box]; "Has checklist?" [shape=diamond]; - "Create TodoWrite todo per item" [shape=box]; + "Create a todo per item" [shape=box]; "Follow skill exactly" [shape=box]; "Respond (including clarifications)" [shape=doublecircle]; - "About to EnterPlanMode?" -> "Already brainstormed?"; + "About to enter plan mode?" -> "Already brainstormed?"; "Already brainstormed?" -> "Invoke brainstorming skill" [label="no"]; "Already brainstormed?" -> "Might any skill apply?" [label="yes"]; "Invoke brainstorming skill" -> "Might any skill apply?"; "User message received" -> "Might any skill apply?"; - "Might any skill apply?" -> "Invoke Skill tool" [label="yes, even 1%"]; + "Might any skill apply?" -> "Invoke the skill" [label="yes, even 1%"]; "Might any skill apply?" -> "Respond (including clarifications)" [label="definitely not"]; - "Invoke Skill tool" -> "Announce: 'Using [skill] to [purpose]'"; + "Invoke the skill" -> "Announce: 'Using [skill] to [purpose]'"; "Announce: 'Using [skill] to [purpose]'" -> "Has checklist?"; - "Has checklist?" -> "Create TodoWrite todo per item" [label="yes"]; + "Has checklist?" -> "Create a todo per item" [label="yes"]; "Has checklist?" -> "Follow skill exactly" [label="no"]; - "Create TodoWrite todo per item" -> "Follow skill exactly"; + "Create a todo per item" -> "Follow skill exactly"; } ``` @@ -98,15 +102,15 @@ These thoughts mean STOP—you're rationalizing: When multiple skills could apply, use this order: -1. **Process skills first** (brainstorming, debugging) - these determine HOW to approach the task +1. **Process skills first** (brainstorming, systematic-debugging) - these determine HOW to approach the task 2. **Implementation skills second** (frontend-design, mcp-builder) - these guide execution "Let's build X" → brainstorming first, then implementation skills. -"Fix this bug" → debugging first, then domain-specific skills. +"Fix this bug" → systematic-debugging first, then domain-specific skills. ## Skill Types -**Rigid** (TDD, debugging): Follow exactly. Don't adapt away discipline. +**Rigid** (TDD, systematic-debugging): Follow exactly. Don't adapt away discipline. **Flexible** (patterns): Adapt principles to context. diff --git a/src-tauri/experts/skills/using-superpowers/references/antigravity-tools.md b/src-tauri/experts/skills/using-superpowers/references/antigravity-tools.md new file mode 100644 index 00000000..b0d4fa1f --- /dev/null +++ b/src-tauri/experts/skills/using-superpowers/references/antigravity-tools.md @@ -0,0 +1,96 @@ +# Antigravity CLI (`agy`) Tool Mapping + +Skills speak in actions ("dispatch a subagent", "create a todo", "read a file"). On the Antigravity CLI (`agy`) these resolve to the tools below. + +| Action skills request | Antigravity CLI equivalent | +|----------------------|----------------------| +| Read a file | `view_file` | +| Create a new file | `write_to_file` | +| Edit a file | `replace_file_content` | +| Edit a file in several places at once | `multi_replace_file_content` | +| Run a shell command | `run_command` | +| Search file contents | `grep_search` | +| Find files by name / list a directory | `list_dir` (no dedicated glob tool — combine `list_dir` with `grep_search`) | +| Fetch a URL | `read_url_content` | +| Search the web | `search_web` | +| Pose a structured question to your human partner | `ask_question` | +| Dispatch a subagent (`Subagent (general-purpose):` template) | `invoke_subagent` with a built-in `TypeName` — `self` for full-capability work, `research` for read-only (see [Subagent support](#subagent-support)) | +| Multiple parallel dispatches | Multiple entries in one `invoke_subagent` call's `Subagents` array | +| Task tracking ("create a todo", "mark complete") | a **task artifact** — `write_to_file` with `IsArtifact: true` and `ArtifactType: "task"` (see [Task tracking](#task-tracking)). **Not** `manage_task`, which manages background processes. | + +## Invoking a skill — read its `SKILL.md` + +Antigravity surfaces every installed skill's `name` + `description` to you at the +start of each session, but it has **no `Skill`/`activate_skill` tool**. To load a +skill, **read its `SKILL.md` with `view_file`, setting `IsSkillFile: true`** when +the skill applies — e.g. `view_file` on +`.../plugins/superpowers/skills/<skill-name>/SKILL.md` with `IsSkillFile: true`. +(`IsSkillFile` is agy's own signal that you're reading a file to *execute its +instructions*, not to edit or preview it — set it whenever you load a skill.) + +This is the blessed skill-loading mechanism on this harness. The general rule +"never read skill files manually" means "don't bypass your platform's +skill-loading mechanism" — and on Antigravity, reading `SKILL.md` *is* that +mechanism. Reading it honors the rule rather than breaking it. + +You already know which skills exist and what they're for: their names and +descriptions are in front of you at session start. When a description matches +what you're about to do, read that skill's `SKILL.md` before acting. + +## Subagent support + +Antigravity dispatches subagents with `invoke_subagent`, passing each one a +`TypeName` in the `Subagents` array. Two `TypeName`s are **built in** — use them +directly, no `define_subagent` needed: + +- **`self`** — a full clone of you, with every tool you have (including + `write_to_file`/`replace_file_content`/`run_command`). The safe default for + general-purpose work: implementing, fixing, anything that edits files or runs + commands. +- **`research`** — read-only (file reading, `grep_search`, web/URL fetch; no write + or command access). Use it when you specifically want a subagent that can't make + changes — investigation and read-only review. + +Call `define_subagent` only for a custom system prompt or capability mix: set +`enable_write_tools: true` to grant file edits **and** `run_command`, +`enable_subagent_tools` for nested dispatch, `enable_mcp_tools` for MCP. Then +invoke it by the name you gave it. (`manage_subagents` lists/kills running +subagents.) + +Skills dispatch with `Subagent (general-purpose):` and either reference a +prompt-template file (e.g. `superpowers:subagent-driven-development`'s +`./implementer-prompt.md`) or supply an inline prompt. On Antigravity: + +| Skill dispatch form | Antigravity equivalent | +|---------------------|----------------------| +| An implementer-style `*-prompt.md` template (writes code, runs tests) | Fill the template, then `invoke_subagent` with `TypeName: "self"` and the filled prompt | +| A read-only reviewer template (`task-reviewer`, `code-reviewer`, `requesting-code-review`'s `./code-reviewer.md`) | `invoke_subagent` with `TypeName: "research"` and the filled review template | +| Inline prompt (no template referenced) | `invoke_subagent` with `TypeName: "self"` (or `"research"` if the task only reads) and your inline prompt | + +### Prompt filling + +Skills provide prompt templates with placeholders like `{WHAT_WAS_IMPLEMENTED}` or +`[FULL TEXT of task]`. Fill all placeholders before passing the complete prompt to +`invoke_subagent`. The prompt template itself contains the agent's role, review +criteria, and expected output format — the subagent will follow it. + +### Parallel dispatch + +Put multiple entries in a single `invoke_subagent` call's `Subagents` array to run +independent subagent work in parallel. Keep dependent tasks sequential, but do not +serialize independent subagent tasks just to preserve a simpler history. + +## Task tracking + +Antigravity has **no todo / `TodoWrite` tool** (`manage_task` manages background +processes — `list`/`kill`/`status`/`send_input` — it is *not* a checklist). When a +skill says to create a todo list or track tasks, maintain a **task artifact**: a +markdown checklist saved with `write_to_file` (`IsArtifact: true`, +`ArtifactMetadata.ArtifactType: "task"`), edited with `replace_file_content` / +`multi_replace_file_content` as you go. + +At the start of any multi-step task, create the task artifact listing every step of +your plan. As you complete each step, edit the artifact to mark it done (`- [x]`). +If the plan changes, update the checklist. Keep it current — it is your source of +truth for what remains; once the conversation gets long, re-read it before starting +each step. diff --git a/src-tauri/experts/skills/using-superpowers/references/claude-code-tools.md b/src-tauri/experts/skills/using-superpowers/references/claude-code-tools.md new file mode 100644 index 00000000..7ddd549a --- /dev/null +++ b/src-tauri/experts/skills/using-superpowers/references/claude-code-tools.md @@ -0,0 +1,50 @@ +# Claude Code Tool Mapping + +Skills speak in actions ("dispatch a subagent", "create a todo", "read a file"). On Claude Code these resolve to the tools below. + +## Tools + +| Action skills request | Claude Code tool | +|----------------------|------------------| +| Read a file | `Read` | +| Create a new file | `Write` | +| Edit a file | `Edit` | +| Run a shell command | `Bash` | +| Search file contents | `Grep` | +| Find files by name | `Glob` | +| Fetch a URL | `WebFetch` | +| Search the web | `WebSearch` | +| Invoke a skill | `Skill` | +| Dispatch a subagent (`Subagent (general-purpose):` template) | `Agent` (older releases named this `Task`) | +| Multiple parallel dispatches | Multiple `Agent` calls in one response | +| Task tracking ("create a todo", "mark complete") | `TaskCreate`, `TaskUpdate`, `TaskList`, `TaskGet`; `TodoWrite` in `claude -p` / Agent SDK unless `CLAUDE_CODE_ENABLE_TASKS=1` is set | +| Background-process / subagent lifecycle (read output, cancel) | `TaskOutput`, `TaskStop` — these are distinct from the todo tools above and apply to running shells, agents, and remote sessions | + +## Instructions file + +When a skill mentions "your instructions file", on Claude Code this is **`CLAUDE.md`**. Claude Code walks up the directory tree from the current working directory and concatenates every `CLAUDE.md` and `CLAUDE.local.md` it finds along the way. Standard locations: + +| Scope | Location | +|-------|----------| +| Project (team-shared) | `./CLAUDE.md` or `./.claude/CLAUDE.md` | +| User global | `~/.claude/CLAUDE.md` | +| Local-private (gitignored) | `./CLAUDE.local.md` | +| Managed policy (org-wide) | `/Library/Application Support/ClaudeCode/CLAUDE.md` (macOS), `/etc/claude-code/CLAUDE.md` (Linux/WSL), `C:\Program Files\ClaudeCode\CLAUDE.md` (Windows) | + +CLAUDE.md files can pull in additional content with `@path/to/file` imports (relative or absolute, max five hops deep). Subdirectory `CLAUDE.md` files are also discovered automatically and loaded on-demand when Claude Code reads files in those subdirectories. + +Claude Code does **not** read `AGENTS.md` directly. If a project already maintains `AGENTS.md` for other agents, import it from `CLAUDE.md` so both runtimes share the same instructions: + +```markdown +@AGENTS.md + +## Claude Code + +(Claude-Code-specific instructions go here.) +``` + +For path-scoped rules and larger-project organization, see `.claude/rules/` (rules can be scoped to specific files via `paths` frontmatter and load on demand). + +## Personal skills directory + +User-level skills live at **`~/.claude/skills/`**. Each skill is a subdirectory containing a `SKILL.md` (with `name` and `description` frontmatter) plus any supporting files. Claude Code does not currently recognize the cross-runtime `~/.agents/skills/` path that Codex, Copilot CLI, and Gemini CLI read; if you're relying on cross-runtime support in the future, verify against the [official skills docs](https://code.claude.com/docs/en/skills). diff --git a/src-tauri/experts/skills/using-superpowers/references/codex-tools.md b/src-tauri/experts/skills/using-superpowers/references/codex-tools.md index f50d40d4..1ab253fd 100644 --- a/src-tauri/experts/skills/using-superpowers/references/codex-tools.md +++ b/src-tauri/experts/skills/using-superpowers/references/codex-tools.md @@ -1,17 +1,30 @@ # Codex Tool Mapping -Skills use Claude Code tool names. When you encounter these in a skill, use your platform equivalent: - -| Skill references | Codex equivalent | -|-----------------|------------------| -| `Task` tool (dispatch subagent) | `spawn_agent` (see [Subagent dispatch requires multi-agent support](#subagent-dispatch-requires-multi-agent-support)) | -| Multiple `Task` calls (parallel) | Multiple `spawn_agent` calls | -| Task returns result | `wait_agent` | -| Task completes automatically | `close_agent` to free slot | -| `TodoWrite` (task tracking) | `update_plan` | -| `Skill` tool (invoke a skill) | Skills load natively — just follow the instructions | -| `Read`, `Write`, `Edit` (files) | Use your native file tools | -| `Bash` (run commands) | Use your native shell tools | +Skills speak in actions ("dispatch a subagent", "create a todo", "read a file"). On Codex these resolve to the tools below. + +| Action skills request | Codex equivalent | +|----------------------|------------------| +| Read a file | `shell` (e.g., `cat`, `head`, `tail`) — Codex reads files via shell | +| Create / edit / delete a file | `apply_patch` (structured diff for create, update, delete) | +| Run a shell command | `shell` | +| Search file contents | `shell` (e.g., `grep`, `rg`) | +| Find files by name | `shell` (e.g., `find`, `ls`) | +| Fetch a URL | `shell` with `curl` / `wget` — Codex has no native fetch tool | +| Search the web | `web_search` (enabled by default; configurable in `config.toml` via the top-level `web_search` setting — `live`, `cached`, or `disabled`) | +| Invoke a skill | Skills load natively — just follow the instructions | +| Dispatch a subagent (`Subagent (general-purpose):` template) | `spawn_agent` (see [Subagent dispatch requires multi-agent support](#subagent-dispatch-requires-multi-agent-support)) | +| Multiple parallel dispatches | Multiple `spawn_agent` calls in one response | +| Wait for subagent result | `wait_agent` | +| Free up subagent slot when done | `close_agent` | +| Task tracking ("create a todo", "mark complete") | `update_plan` | + +## Instructions file + +When a skill mentions "your instructions file", on Codex this is **`AGENTS.md`** at the project root. Codex also reads `~/.codex/AGENTS.md` for global context, and an `AGENTS.override.md` (in the project tree or `~/.codex/`) takes precedence when present. Codex walks from the project root down to the current working directory, concatenating `AGENTS.md` files it finds along the way, up to `project_doc_max_bytes` (32 KiB by default). + +## Personal skills directory + +User-level skills live at **`$CODEX_HOME/skills/`** (default `~/.codex/skills/`). Codex also reads the cross-runtime path **`~/.agents/skills/`** (shared with Copilot CLI and Gemini CLI). When both directories exist at the same scope, Codex loads them both as separate skill catalogs — Codex's docs don't currently document a precedence between them. Each skill is a subdirectory containing a `SKILL.md` (with `name` and `description` frontmatter). ## Subagent dispatch requires multi-agent support diff --git a/src-tauri/experts/skills/using-superpowers/references/copilot-tools.md b/src-tauri/experts/skills/using-superpowers/references/copilot-tools.md index ae3cf5a6..2cf54a0d 100644 --- a/src-tauri/experts/skills/using-superpowers/references/copilot-tools.md +++ b/src-tauri/experts/skills/using-superpowers/references/copilot-tools.md @@ -1,31 +1,38 @@ # Copilot CLI Tool Mapping -Skills use Claude Code tool names. When you encounter these in a skill, use your platform equivalent: - -| Skill references | Copilot CLI equivalent | -|-----------------|----------------------| -| `Read` (file reading) | `view` | -| `Write` (file creation) | `create` | -| `Edit` (file editing) | `edit` | -| `Bash` (run commands) | `bash` | -| `Grep` (search file content) | `grep` | -| `Glob` (search files by name) | `glob` | -| `Skill` tool (invoke a skill) | `skill` | -| `WebFetch` | `web_fetch` | -| `Task` tool (dispatch subagent) | `task` with `agent_type: "general-purpose"` or `"explore"` | -| Multiple `Task` calls (parallel) | Multiple `task` calls | -| Task status/output | `read_agent`, `list_agents` | -| `TodoWrite` (task tracking) | `sql` with built-in `todos` table | -| `WebSearch` | No equivalent — use `web_fetch` with a search engine URL | -| `EnterPlanMode` / `ExitPlanMode` | No equivalent — stay in the main session | +Skills speak in actions ("dispatch a subagent", "create a todo", "read a file"). On Copilot CLI these resolve to the tools below. + +| Action skills request | Copilot CLI equivalent | +|----------------------|----------------------| +| Read a file | `view` | +| Create / edit / delete a file | `apply_patch` (Copilot CLI has no separate create/edit/write tools) | +| Run a shell command | `bash` | +| Search file contents | `rg` (ripgrep; Copilot CLI does not expose a `grep` tool) | +| Find files by name | `glob` | +| Fetch a URL | `web_fetch` | +| Search the web | `web_search` | +| Invoke a skill | `skill` | +| Dispatch a subagent (`Subagent (general-purpose):` template) | `task` with `agent_type: "general-purpose"` (other accepted types: `explore`, `task`, `code-review`, `research`, `configure-copilot`) | +| Multiple parallel dispatches | Multiple `task` calls in one response | +| Subagent status/output/control | `read_agent`, `list_agents`, `write_agent` | +| Task tracking ("create a todo", "mark complete") | `update_todo` | +| Enter / exit plan mode | No equivalent — stay in the main session | + +## Instructions file + +When a skill mentions "your instructions file", on Copilot CLI this is **`AGENTS.md`** at the repository root. If both `AGENTS.md` and `.github/copilot-instructions.md` are present, Copilot reads both. + +## Personal skills directory + +User-level skills live at **`~/.copilot/skills/`**. Copilot CLI also recognizes the cross-runtime alias **`~/.agents/skills/`**, which is shared with Codex and Gemini CLI. Each skill is a subdirectory containing a `SKILL.md` (with `name` and `description` frontmatter). ## Async shell sessions -Copilot CLI supports persistent async shell sessions, which have no direct Claude Code equivalent: +Copilot CLI supports persistent async shell sessions: | Tool | Purpose | |------|---------| -| `bash` with `async: true` | Start a long-running command in the background | +| `bash` with `mode: "async"` (and optionally `detach: true`) | Start a long-running command in the background; returns a `shellId` | | `write_bash` | Send input to a running async session | | `read_bash` | Read output from an async session | | `stop_bash` | Terminate an async session | diff --git a/src-tauri/experts/skills/using-superpowers/references/gemini-tools.md b/src-tauri/experts/skills/using-superpowers/references/gemini-tools.md index 91ef4049..b01b6523 100644 --- a/src-tauri/experts/skills/using-superpowers/references/gemini-tools.md +++ b/src-tauri/experts/skills/using-superpowers/references/gemini-tools.md @@ -1,51 +1,63 @@ # Gemini CLI Tool Mapping -Skills use Claude Code tool names. When you encounter these in a skill, use your platform equivalent: - -| Skill references | Gemini CLI equivalent | -|-----------------|----------------------| -| `Read` (file reading) | `read_file` | -| `Write` (file creation) | `write_file` | -| `Edit` (file editing) | `replace` | -| `Bash` (run commands) | `run_shell_command` | -| `Grep` (search file content) | `grep_search` | -| `Glob` (search files by name) | `glob` | -| `TodoWrite` (task tracking) | `write_todos` | -| `Skill` tool (invoke a skill) | `activate_skill` | -| `WebSearch` | `google_web_search` | -| `WebFetch` | `web_fetch` | -| `Task` tool (dispatch subagent) | `@agent-name` (see [Subagent support](#subagent-support)) | +Skills speak in actions ("dispatch a subagent", "create a todo", "read a file"). On Gemini CLI these resolve to the tools below. + +| Action skills request | Gemini CLI equivalent | +|----------------------|----------------------| +| Read a file | `read_file` | +| Read multiple files at once | `read_many_files` | +| Create a new file | `write_file` | +| Edit a file | `replace` | +| Run a shell command | `run_shell_command` | +| Search file contents | `grep_search` | +| Find files by name | `glob` | +| List files and subdirectories | `list_directory` | +| Fetch a URL | `web_fetch` | +| Search the web | `google_web_search` | +| Invoke a skill | `activate_skill` | +| Dispatch a subagent (`Subagent (general-purpose):` template) | `invoke_agent` with `agent_name: "generalist"` (invocable via `@generalist` chat syntax — see [Subagent support](#subagent-support)) | +| Multiple parallel dispatches | Multiple `invoke_agent` calls in the same response | +| Task tracking ("create a todo", "mark complete") | `write_todos` (statuses: pending, in_progress, completed, cancelled, blocked) | + +## Instructions file + +When a skill mentions "your instructions file", on Gemini CLI this is **`GEMINI.md`**. Gemini CLI loads `GEMINI.md` hierarchically: global at `~/.gemini/GEMINI.md`, project-level files in workspace directories and their ancestors, and sub-directory `GEMINI.md` files when a tool accesses files in those directories. + +## Personal skills directory + +User-level skills live at **`~/.gemini/skills/`**, with **`~/.agents/skills/`** as a cross-runtime alias (shared with Codex and Copilot CLI). When both directories exist at the same scope, `.agents/skills/` takes precedence. Each skill is a subdirectory containing a `SKILL.md` (with `name` and `description` frontmatter). ## Subagent support -Gemini CLI supports subagents natively via the `@` syntax. Use the built-in `@generalist` agent to dispatch any task — it has access to all tools and follows the prompt you provide. +Gemini CLI dispatches subagents through the `invoke_agent` tool, which takes `agent_name` and `prompt` parameters. The same dispatch is also surfaced as a chat-syntax shortcut: typing `@generalist <prompt>` is equivalent to calling `invoke_agent` with `agent_name: "generalist"`. Built-in agent names include `generalist`, `cli_help`, `codebase_investigator`, and (with browser tooling enabled) `browser_agent`. -When a skill says to dispatch a named agent type, use `@generalist` with the full prompt from the skill's prompt template: +Skills dispatch with `Subagent (general-purpose):` and either reference a prompt-template file (e.g., `superpowers:subagent-driven-development`'s `./implementer-prompt.md`) or supply an inline prompt. On Gemini CLI: -| Skill instruction | Gemini CLI equivalent | -|-------------------|----------------------| -| `Task tool (superpowers:implementer)` | `@generalist` with the filled `implementer-prompt.md` template | -| `Task tool (superpowers:spec-reviewer)` | `@generalist` with the filled `spec-reviewer-prompt.md` template | -| `Task tool (superpowers:code-reviewer)` | `@code-reviewer` (bundled agent) or `@generalist` with the filled review prompt | -| `Task tool (superpowers:code-quality-reviewer)` | `@generalist` with the filled `code-quality-reviewer-prompt.md` template | -| `Task tool (general-purpose)` with inline prompt | `@generalist` with your inline prompt | +| Skill dispatch form | Gemini CLI equivalent | +|---------------------|----------------------| +| References a `*-prompt.md` template (implementer, task-reviewer, code-reviewer, etc.) | Fill the template, then `invoke_agent` with `agent_name: "generalist"` and the filled prompt | +| References `superpowers:requesting-code-review`'s `./code-reviewer.md` | `invoke_agent` with `agent_name: "generalist"` and the filled review template | +| Inline prompt (no template referenced) | `invoke_agent` with `agent_name: "generalist"` and your inline prompt | ### Prompt filling -Skills provide prompt templates with placeholders like `{WHAT_WAS_IMPLEMENTED}` or `[FULL TEXT of task]`. Fill all placeholders and pass the complete prompt as the message to `@generalist`. The prompt template itself contains the agent's role, review criteria, and expected output format — `@generalist` will follow it. +Skills provide prompt templates with placeholders like `{WHAT_WAS_IMPLEMENTED}` or `[FULL TEXT of task]`. Fill all placeholders before passing the complete prompt to `invoke_agent`. The prompt template itself contains the agent's role, review criteria, and expected output format — the subagent will follow it. ### Parallel dispatch -Gemini CLI supports parallel subagent dispatch. When a skill asks you to dispatch multiple independent subagent tasks in parallel, request all of those `@generalist` or named subagent tasks together in the same prompt. Keep dependent tasks sequential, but do not serialize independent subagent tasks just to preserve a simpler history. +Gemini CLI supports parallel subagent dispatch. Issue multiple `invoke_agent` calls in the same response (or multiple `@generalist` invocations in one prompt) to run independent subagent work in parallel. Keep dependent tasks sequential, but do not serialize independent subagent tasks just to preserve a simpler history. ## Additional Gemini CLI tools -These tools are available in Gemini CLI but have no Claude Code equivalent: +These tools are unique to Gemini CLI: | Tool | Purpose | |------|---------| -| `list_directory` | List files and subdirectories | -| `save_memory` | Persist facts to GEMINI.md across sessions | -| `ask_user` | Request structured input from the user | -| `tracker_create_task` | Rich task management (create, update, list, visualize) | -| `enter_plan_mode` / `exit_plan_mode` | Switch to read-only research mode before making changes | +| `save_memory` (legacy) | Persist facts across sessions when `experimental.memoryV2 = false` | +| `get_internal_docs` | Look up Gemini CLI's bundled documentation | +| `ask_user` | Pose structured questions to the user (text / single-select / multi-select) | +| `enter_plan_mode` / `exit_plan_mode` | Switch into and out of read-only plan mode | +| `update_topic` | Update the current conversation's topic / strategic-intent metadata | +| `complete_task` | Signal that a Gemini subagent has completed and return its result to the parent agent | +| `tracker_create_task`, `tracker_update_task`, `tracker_get_task`, `tracker_list_tasks`, `tracker_add_dependency`, `tracker_visualize` | Rich task tracker with dependency and visualization support | +| `read_mcp_resource`, `list_mcp_resources` | MCP resource access | diff --git a/src-tauri/experts/skills/using-superpowers/references/pi-tools.md b/src-tauri/experts/skills/using-superpowers/references/pi-tools.md new file mode 100644 index 00000000..04889cba --- /dev/null +++ b/src-tauri/experts/skills/using-superpowers/references/pi-tools.md @@ -0,0 +1,28 @@ +# Pi Tool Mapping + +Skills speak in actions ("dispatch a subagent", "create a todo", "read a file"). On Pi these resolve to the tools below. + +| Action skills request | Pi equivalent | +| --- | --- | +| Invoke a skill | Pi native skills: load the relevant `SKILL.md` with `read`, or let the human use `/skill:name` | +| Read a file | `read` | +| Create a file | `write` | +| Edit a file | `edit` | +| Run a shell command | `bash` | +| Search file contents | `grep` when active; otherwise `bash` with `rg`/`grep` | +| Find files by name | `find` or `bash` with shell globs | +| List files and subdirectories | `ls` when active; otherwise `bash` with `ls` | +| Dispatch a subagent (`Subagent (general-purpose):` template) | Use an installed subagent tool such as `subagent` from `pi-subagents` if available | +| Task tracking ("create a todo", "mark complete") | Use an installed todo/task tool if available, otherwise track tasks in the plan or `TODO.md` | + +## Skills + +Pi discovers skills from configured skill directories and installed Pi packages. A Superpowers Pi package should expose `skills/` through its `pi.skills` manifest entry. Pi does not expose Claude Code's `Skill` tool, but the agent should still follow the Superpowers rule: when a skill applies, load and follow it before responding. + +## Subagents + +Pi core does not ship a standard subagent tool. The `pi-subagents` package is a strong optional companion and provides a `subagent` tool with single-agent, chain, parallel, async, forked-context, and resume/status workflows. If no subagent tool is available, do not fabricate `Task` calls; execute sequentially in the current session or explain that the optional subagent capability is not installed. + +## Task lists + +Pi core does not ship a standard task-list tool. If a todo/task extension is installed, use its documented tool. Otherwise use Superpowers plan files, checklists in Markdown, or a repo-local `TODO.md` for task tracking. Older Superpowers docs may refer to `TodoWrite`; treat that as the task-tracking action above. diff --git a/src-tauri/experts/skills/writing-plans/SKILL.md b/src-tauri/experts/skills/writing-plans/SKILL.md index 847412ec..b1613eb0 100644 --- a/src-tauri/experts/skills/writing-plans/SKILL.md +++ b/src-tauri/experts/skills/writing-plans/SKILL.md @@ -33,6 +33,15 @@ Before defining tasks, map out which files will be created or modified and what This structure informs the task decomposition. Each task should produce self-contained changes that make sense independently. +## Task Right-Sizing + +A task is the smallest unit that carries its own test cycle and is worth a +fresh reviewer's gate. When drawing task boundaries: fold setup, +configuration, scaffolding, and documentation steps into the task whose +deliverable needs them; split only where a reviewer could meaningfully +reject one task while approving its neighbor. Each task ends with an +independently testable deliverable. + ## Bite-Sized Task Granularity **Each step is one action (2-5 minutes):** @@ -57,6 +66,13 @@ This structure informs the task decomposition. Each task should produce self-con **Tech Stack:** [Key technologies/libraries] +## Global Constraints + +[The spec's project-wide requirements — version floors, dependency limits, +naming and copy rules, platform requirements — one line each, with exact +values copied verbatim from the spec. Every task's requirements implicitly +include this section.] + --- ``` @@ -70,6 +86,12 @@ This structure informs the task decomposition. Each task should produce self-con - Modify: `exact/path/to/existing.py:123-145` - Test: `tests/exact/path/to/test.py` +**Interfaces:** +- Consumes: [what this task uses from earlier tasks — exact signatures] +- Produces: [what later tasks rely on — exact function names, parameter + and return types. A task's implementer sees only their own task; this + block is how they learn the names and types neighboring tasks use.] + - [ ] **Step 1: Write the failing test** ```python diff --git a/src-tauri/experts/skills/writing-plans/plan-document-reviewer-prompt.md b/src-tauri/experts/skills/writing-plans/plan-document-reviewer-prompt.md index 2db28067..1c12c1d6 100644 --- a/src-tauri/experts/skills/writing-plans/plan-document-reviewer-prompt.md +++ b/src-tauri/experts/skills/writing-plans/plan-document-reviewer-prompt.md @@ -7,7 +7,7 @@ Use this template when dispatching a plan document reviewer subagent. **Dispatch after:** The complete plan is written. ``` -Task tool (general-purpose): +Subagent (general-purpose): description: "Review plan document" prompt: | You are a plan document reviewer. Verify this plan is complete and ready for implementation. diff --git a/src-tauri/experts/skills/writing-skills/SKILL.md b/src-tauri/experts/skills/writing-skills/SKILL.md index c3b73d8b..8928d449 100644 --- a/src-tauri/experts/skills/writing-skills/SKILL.md +++ b/src-tauri/experts/skills/writing-skills/SKILL.md @@ -9,7 +9,7 @@ description: Use when creating new skills, editing existing skills, or verifying **Writing skills IS Test-Driven Development applied to process documentation.** -**Personal skills live in agent-specific directories (`~/.claude/skills` for Claude Code, `~/.agents/skills/` for Codex)** +**Personal skills live in your runtime's skills directory** — see [claude-code-tools.md](../using-superpowers/references/claude-code-tools.md), [codex-tools.md](../using-superpowers/references/codex-tools.md), [copilot-tools.md](../using-superpowers/references/copilot-tools.md), or [gemini-tools.md](../using-superpowers/references/gemini-tools.md) for the path on your runtime. Codex, Copilot CLI, and Gemini CLI all also recognize `~/.agents/skills/` as a cross-runtime alias. You write test cases (pressure scenarios with subagents), watch them fail (baseline behavior), write the skill (documentation), watch tests pass (agents comply), and refactor (close loopholes). @@ -21,7 +21,7 @@ You write test cases (pressure scenarios with subagents), watch them fail (basel ## What is a Skill? -A **skill** is a reference guide for proven techniques, patterns, or tools. Skills help future Claude instances find and apply effective approaches. +A **skill** is a reference guide for proven techniques, patterns, or tools. Skills help future agents find and apply effective approaches. **Skills are:** Reusable techniques, patterns, tools, reference guides @@ -55,7 +55,7 @@ The entire skill creation process follows RED-GREEN-REFACTOR. **Don't create for:** - One-off solutions - Standard practices well-documented elsewhere -- Project-specific conventions (put in CLAUDE.md) +- Project-specific conventions (put in your instructions file) - Mechanical constraints (if it's enforceable with regex/validation, automate it—save documentation for judgment calls) ## Skill Types @@ -99,7 +99,7 @@ skills/ - `description`: Third-person, describes ONLY when to use (NOT what it does) - Start with "Use when..." to focus on triggering conditions - Include specific symptoms, situations, and contexts - - **NEVER summarize the skill's process or workflow** (see CSO section for why) + - **NEVER summarize the skill's process or workflow** (see SDO section for why) - Keep under 500 characters if possible ```markdown @@ -137,13 +137,13 @@ Concrete results ``` -## Claude Search Optimization (CSO) +## Skill Discovery Optimization (SDO) -**Critical for discovery:** Future Claude needs to FIND your skill +**Critical for discovery:** Future agents need to FIND your skill ### 1. Rich Description Field -**Purpose:** Claude reads description to decide which skills to load for a given task. Make it answer: "Should I read this skill right now?" +**Purpose:** Your agent reads the description to decide which skills to load for a given task. Make it answer: "Should I read this skill right now?" **Format:** Start with "Use when..." to focus on triggering conditions @@ -151,14 +151,14 @@ Concrete results The description should ONLY describe triggering conditions. Do NOT summarize the skill's process or workflow in the description. -**Why this matters:** Testing revealed that when a description summarizes the skill's workflow, Claude may follow the description instead of reading the full skill content. A description saying "code review between tasks" caused Claude to do ONE review, even though the skill's flowchart clearly showed TWO reviews (spec compliance then code quality). +**Why this matters:** Testing revealed that when a description summarizes the skill's workflow, an agent may follow the description instead of reading the full skill content. A description saying "code review between tasks" caused an agent to do ONE review, even though the skill's flowchart clearly showed TWO reviews (spec compliance then code quality). -When the description was changed to just "Use when executing implementation plans with independent tasks" (no workflow summary), Claude correctly read the flowchart and followed the two-stage review process. +When the description was changed to just "Use when executing implementation plans with independent tasks" (no workflow summary), the agent correctly read the flowchart and followed the two-stage review process. -**The trap:** Descriptions that summarize workflow create a shortcut Claude will take. The skill body becomes documentation Claude skips. +**The trap:** Descriptions that summarize workflow create a shortcut agents will take. The skill body becomes documentation agents skip. ```yaml -# ❌ BAD: Summarizes workflow - Claude may follow this instead of reading skill +# ❌ BAD: Summarizes workflow - agents may follow this instead of reading skill description: Use when executing plans - dispatches subagent per task with code review between tasks # ❌ BAD: Too much process detail @@ -198,7 +198,7 @@ description: Use when using React Router and handling authentication redirects ### 2. Keyword Coverage -Use words Claude would search for: +Use words an agent would search for: - Error messages: "Hook timed out", "ENOTEMPTY", "race condition" - Symptoms: "flaky", "hanging", "zombie", "pollution" - Synonyms: "timeout/hang/freeze", "cleanup/teardown/afterEach" @@ -275,7 +275,7 @@ wc -w skills/path/SKILL.md - `creating-skills`, `testing-skills`, `debugging-with-logs` - Active, describes the action you're taking -### 4. Cross-Referencing Other Skills +### 5. Cross-Referencing Other Skills **When writing documentation that references other skills:** @@ -313,7 +313,7 @@ digraph when_flowchart { - Linear instructions → Numbered lists - Labels without semantic meaning (step1, helper2) -See @graphviz-conventions.dot for graphviz style rules. +See `graphviz-conventions.dot` in this directory for graphviz style rules. **Visualizing for your human partner:** Use `render-graphs.js` in this directory to render a skill's flowcharts to SVG: ```bash @@ -456,10 +456,29 @@ Different skill types need different test approaches: **All of these mean: Test before deploying. No exceptions.** +## Match the Form to the Failure + +Before writing guidance, classify the baseline failure. The form that bulletproofs one failure type measurably backfires on another. + +| Baseline failure | Right form | Wrong form | +|---|---|---| +| Skips/violates a rule under pressure (knows better, does it anyway) | Prohibition + rationalization table + red flags (see Bulletproofing below) | Soft guidance ("prefer...", "consider...") | +| Complies, but output has the wrong shape (bloated prompt, buried verdict, restated spec) | Positive recipe or contract: state what the output IS — its parts, in order | Prohibition list ("don't restate", "never narrate") | +| Omits a required element from something they already produce | Structural: REQUIRED field or slot in the template they fill in | Prose reminders near the template | +| Behavior should depend on a condition | Conditional keyed to an observable predicate ("if the brief exists, reference it") | Unconditional rule + exemption clauses | + +**Why prohibitions backfire on shaping problems:** under a competing incentive ("make the prompt self-contained"), agents negotiate with "don't X". In head-to-head wording tests on dispatch-prompt guidance, the prohibition arm produced clearly more of the unwanted content than the recipe arm (fully separated distributions), and trended worse than even the no-guidance control — micro-test your own case rather than assuming, but never reach for the prohibition by default. A recipe leaves nothing to negotiate: the output matches the stated shape or it doesn't. + +**Rules for whichever form you pick:** +- **No nuance clauses.** "Don't X unless it matters" reopens the negotiation — appending a single nuance clause to a winning recipe degraded it from consistent to noisy in the same wording tests. Express a real exception as its own conditional on an observable predicate. +- **Exemption clauses don't scope.** "This limit doesn't apply to code blocks" still suppresses code blocks. If part of the output must be exempt, restructure so the rule can't reach it. + ## Bulletproofing Skills Against Rationalization Skills that enforce discipline (like TDD) need to resist rationalization. Agents are smart and will find loopholes when under pressure. +**Scope:** this toolkit is for discipline failures — an agent that knows the rule and skips it under pressure. For wrong-shaped output or omitted elements, prohibition-based bulletproofing backfires; use the forms in Match the Form to the Failure instead. + **Psychology note:** Understanding WHY persuasion techniques work helps you apply them systematically. See persuasion-principles.md for research foundation (Cialdini, 2021; Meincke et al., 2025) on authority, commitment, scarcity, social proof, and unity principles. ### Close Every Loophole Explicitly @@ -522,7 +541,7 @@ Make it easy for agents to self-check when rationalizing: **All of these mean: Delete code. Start over with TDD.** ``` -### Update CSO for Violation Symptoms +### Update SDO for Violation Symptoms Add to description: symptoms of when you're ABOUT to violate the rule: @@ -553,7 +572,19 @@ Run same scenarios WITH skill. Agent should now comply. Agent found new rationalization? Add explicit counter. Re-test until bulletproof. -**Testing methodology:** See @testing-skills-with-subagents.md for the complete testing methodology: +### Micro-Test Wording Before Full Scenarios + +Full pressure-scenario runs are the final gate, but they are slow and expensive per iteration. Verify the wording itself first with micro-tests: + +1. **One fresh-context sample per call** — a raw API call, or a single-shot subagent if you don't have API access. System prompt = the realistic context the guidance will live in (the full skill or prompt template, not the guidance in isolation); user message = a task that tempts the failure. +2. **Always include a no-guidance control.** If the control doesn't exhibit the failure, there is nothing to fix — stop, don't author the guidance. +3. **5+ reps per variant.** Single samples lie. +4. **Manually read every flagged match.** Score programmatically if you like, but template echoes and quoted counter-examples masquerade as hits; automated counts alone overstate both failure and success. +5. **Variance is a metric.** When guidance lands, reps converge on the same shape. Five different interpretations across five reps means the wording isn't binding — tighten the form before adding words. + +Micro-tests verify wording; they do not replace pressure scenarios for discipline skills. + +**Testing methodology:** See [testing-skills-with-subagents.md](testing-skills-with-subagents.md) for the complete testing methodology: - How to write pressure scenarios - Pressure types (time, sunk cost, authority, exhaustion) - Plugging holes systematically @@ -595,7 +626,7 @@ Deploying untested skills = deploying untested code. It's a violation of quality ## Skill Creation Checklist (TDD Adapted) -**IMPORTANT: Use TodoWrite to create todos for EACH checklist item below.** +**IMPORTANT: Create a todo for EACH checklist item below.** **RED Phase - Write Failing Test:** - [ ] Create pressure scenarios (3+ combined pressures for discipline skills) @@ -610,6 +641,8 @@ Deploying untested skills = deploying untested code. It's a violation of quality - [ ] Keywords throughout for search (errors, symptoms, tools) - [ ] Clear overview with core principle - [ ] Address specific baseline failures identified in RED +- [ ] Guidance form matches the failure type (see Match the Form to the Failure) +- [ ] For behavior-shaping guidance: wording micro-tested against a no-guidance control (5+ reps, every flagged match read manually) — N/A for pure reference skills - [ ] Code inline OR link to separate file - [ ] One excellent example (not multi-language) - [ ] Run scenarios WITH skill - verify agents now comply @@ -634,9 +667,10 @@ Deploying untested skills = deploying untested code. It's a violation of quality ## Discovery Workflow -How future Claude finds your skill: +How future agents find your skill: 1. **Encounters problem** ("tests are flaky") +2. **Searches skills** (greps descriptions, browses categories) 3. **Finds SKILL** (description matches) 4. **Scans overview** (is this relevant?) 5. **Reads patterns** (quick reference table) diff --git a/src-tauri/experts/skills/writing-skills/anthropic-best-practices.md b/src-tauri/experts/skills/writing-skills/anthropic-best-practices.md index 9f3f6ecf..15ea9eae 100644 --- a/src-tauri/experts/skills/writing-skills/anthropic-best-practices.md +++ b/src-tauri/experts/skills/writing-skills/anthropic-best-practices.md @@ -1,30 +1,30 @@ # Skill authoring best practices -> Learn how to write effective Skills that Claude can discover and use successfully. +> Learn how to write effective Skills that agents can discover and use successfully. -Good Skills are concise, well-structured, and tested with real usage. This guide provides practical authoring decisions to help you write Skills that Claude can discover and use effectively. +Good Skills are concise, well-structured, and tested with real usage. This guide provides practical authoring decisions to help you write Skills that agents can discover and use effectively. -For conceptual background on how Skills work, see the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview). +For conceptual background on how Skills work, see the [Skills overview](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview). ## Core principles ### Concise is key -The [context window](https://platform.claude.com/docs/en/build-with-claude/context-windows) is a public good. Your Skill shares the context window with everything else Claude needs to know, including: +The [context window](https://platform.claude.com/docs/en/build-with-claude/context-windows) is a public good. Your Skill shares the context window with everything else your agent needs to know, including: * The system prompt * Conversation history * Other Skills' metadata * Your actual request -Not every token in your Skill has an immediate cost. At startup, only the metadata (name and description) from all Skills is pre-loaded. Claude reads SKILL.md only when the Skill becomes relevant, and reads additional files only as needed. However, being concise in SKILL.md still matters: once Claude loads it, every token competes with conversation history and other context. +Not every token in your Skill has an immediate cost. At startup, only the metadata (name and description) from all Skills is pre-loaded. Agents read SKILL.md only when the Skill becomes relevant, and read additional files only as needed. However, being concise in SKILL.md still matters: once an agent loads it, every token competes with conversation history and other context. -**Default assumption**: Claude is already very smart +**Default assumption**: Agents are already very smart -Only add context Claude doesn't already have. Challenge each piece of information: +Only add context agents don't already have. Challenge each piece of information: -* "Does Claude really need this explanation?" -* "Can I assume Claude knows this?" +* "Does the agent really need this explanation?" +* "Can I assume the agent knows this?" * "Does this paragraph justify its token cost?" **Good example: Concise** (approximately 50 tokens): @@ -54,7 +54,7 @@ recommend pdfplumber because it's easy to use and handles most cases well. First, you'll need to install it using pip. Then you can use the code below... ``` -The concise version assumes Claude knows what PDFs are and how libraries work. +The concise version assumes the agent knows what PDFs are and how libraries work. ### Set appropriate degrees of freedom @@ -124,10 +124,10 @@ python scripts/migrate.py --verify --backup Do not modify the command or add additional flags. ```` -**Analogy**: Think of Claude as a robot exploring a path: +**Analogy**: Think of the agent as a robot exploring a path: * **Narrow bridge with cliffs on both sides**: There's only one safe way forward. Provide specific guardrails and exact instructions (low freedom). Example: database migrations that must run in exact sequence. -* **Open field with no hazards**: Many paths lead to success. Give general direction and trust Claude to find the best route (high freedom). Example: code reviews where context determines the best approach. +* **Open field with no hazards**: Many paths lead to success. Give general direction and trust the agent to find the best route (high freedom). Example: code reviews where context determines the best approach. ### Test with all models you plan to use @@ -149,7 +149,7 @@ What works perfectly for Opus might need more detail for Haiku. If you plan to u * `name` - Human-readable name of the Skill (64 characters maximum) * `description` - One-line description of what the Skill does and when to use it (1024 characters maximum) - For complete Skill structure details, see the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview#skill-structure). + For complete Skill structure details, see the [Skills overview](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#skill-structure). </Note> ### Naming conventions @@ -196,7 +196,7 @@ The `description` field enables Skill discovery and should include both what the **Be specific and include key terms**. Include both what the Skill does and specific triggers/contexts for when to use it. -Each Skill has exactly one description field. The description is critical for skill selection: Claude uses it to choose the right Skill from potentially 100+ available Skills. Your description must provide enough detail for Claude to know when to select this Skill, while the rest of SKILL.md provides the implementation details. +Each Skill has exactly one description field. The description is critical for skill selection: agents use it to choose the right Skill from potentially 100+ available Skills. Your description must provide enough detail for an agent to know when to select this Skill, while the rest of SKILL.md provides the implementation details. Effective examples: @@ -234,7 +234,7 @@ description: Does stuff with files ### Progressive disclosure patterns -SKILL.md serves as an overview that points Claude to detailed materials as needed, like a table of contents in an onboarding guide. For an explanation of how progressive disclosure works, see [How Skills work](/en/docs/agents-and-tools/agent-skills/overview#how-skills-work) in the overview. +SKILL.md serves as an overview that points agents to detailed materials as needed, like a table of contents in an onboarding guide. For an explanation of how progressive disclosure works, see [How Skills work](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#how-skills-work) in the overview. **Practical guidance:** @@ -248,7 +248,7 @@ A basic Skill starts with just a SKILL.md file containing metadata and instructi <img src="https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-simple-file.png?fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=87782ff239b297d9a9e8e1b72ed72db9" alt="Simple SKILL.md file showing YAML frontmatter and markdown body" data-og-width="2048" width="2048" data-og-height="1153" height="1153" data-path="images/agent-skills-simple-file.png" data-optimize="true" data-opv="3" srcset="https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-simple-file.png?w=280&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=c61cc33b6f5855809907f7fda94cd80e 280w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-simple-file.png?w=560&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=90d2c0c1c76b36e8d485f49e0810dbfd 560w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-simple-file.png?w=840&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=ad17d231ac7b0bea7e5b4d58fb4aeabb 840w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-simple-file.png?w=1100&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=f5d0a7a3c668435bb0aee9a3a8f8c329 1100w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-simple-file.png?w=1650&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=0e927c1af9de5799cfe557d12249f6e6 1650w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-simple-file.png?w=2500&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=46bbb1a51dd4c8202a470ac8c80a893d 2500w" /> -As your Skill grows, you can bundle additional content that Claude loads only when needed: +As your Skill grows, you can bundle additional content that agents load only when needed: <img src="https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-bundling-content.png?fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=a5e0aa41e3d53985a7e3e43668a33ea3" alt="Bundling additional reference files like reference.md and forms.md." data-og-width="2048" width="2048" data-og-height="1327" height="1327" data-path="images/agent-skills-bundling-content.png" data-optimize="true" data-opv="3" srcset="https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-bundling-content.png?w=280&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=f8a0e73783e99b4a643d79eac86b70a2 280w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-bundling-content.png?w=560&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=dc510a2a9d3f14359416b706f067904a 560w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-bundling-content.png?w=840&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=82cd6286c966303f7dd914c28170e385 840w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-bundling-content.png?w=1100&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=56f3be36c77e4fe4b523df209a6824c6 1100w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-bundling-content.png?w=1650&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=d22b5161b2075656417d56f41a74f3dd 1650w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-bundling-content.png?w=2500&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=3dd4bdd6850ffcc96c6c45fcb0acd6eb 2500w" /> @@ -292,11 +292,11 @@ with pdfplumber.open("file.pdf") as pdf: **Examples**: See [EXAMPLES.md](EXAMPLES.md) for common patterns ```` -Claude loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed. +Agents load FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed. #### Pattern 2: Domain-specific organization -For Skills with multiple domains, organize content by domain to avoid loading irrelevant context. When a user asks about sales metrics, Claude only needs to read sales-related schemas, not finance or marketing data. This keeps token usage low and context focused. +For Skills with multiple domains, organize content by domain to avoid loading irrelevant context. When a user asks about sales metrics, the agent only needs to read sales-related schemas, not finance or marketing data. This keeps token usage low and context focused. ``` bigquery-skill/ @@ -348,13 +348,13 @@ For simple edits, modify the XML directly. **For OOXML details**: See [OOXML.md](OOXML.md) ``` -Claude reads REDLINING.md or OOXML.md only when the user needs those features. +Agents read REDLINING.md or OOXML.md only when the user needs those features. ### Avoid deeply nested references -Claude may partially read files when they're referenced from other referenced files. When encountering nested references, Claude might use commands like `head -100` to preview content rather than reading entire files, resulting in incomplete information. +Agents may partially read files when they're referenced from other referenced files. When encountering nested references, an agent might use commands like `head -100` to preview content rather than reading entire files, resulting in incomplete information. -**Keep references one level deep from SKILL.md**. All reference files should link directly from SKILL.md to ensure Claude reads complete files when needed. +**Keep references one level deep from SKILL.md**. All reference files should link directly from SKILL.md to ensure agents read complete files when needed. **Bad example: Too deep**: @@ -382,7 +382,7 @@ Here's the actual information... ### Structure longer reference files with table of contents -For reference files longer than 100 lines, include a table of contents at the top. This ensures Claude can see the full scope of available information even when previewing with partial reads. +For reference files longer than 100 lines, include a table of contents at the top. This ensures agents can see the full scope of available information even when previewing with partial reads. **Example**: @@ -403,7 +403,7 @@ For reference files longer than 100 lines, include a table of contents at the to ... ``` -Claude can then read the complete file or jump to specific sections as needed. +Agents can then read the complete file or jump to specific sections as needed. For details on how this filesystem-based architecture enables progressive disclosure, see the [Runtime environment](#runtime-environment) section in the Advanced section below. @@ -411,7 +411,7 @@ For details on how this filesystem-based architecture enables progressive disclo ### Use workflows for complex tasks -Break complex operations into clear, sequential steps. For particularly complex workflows, provide a checklist that Claude can copy into its response and check off as it progresses. +Break complex operations into clear, sequential steps. For particularly complex workflows, provide a checklist that the agent can copy into its response and check off as it progresses. **Example 1: Research synthesis workflow** (for Skills without code): @@ -498,7 +498,7 @@ Run: `python scripts/verify_output.py output.pdf` If verification fails, return to Step 2. ```` -Clear steps prevent Claude from skipping critical validation. The checklist helps both Claude and you track progress through multi-step workflows. +Clear steps prevent agents from skipping critical validation. The checklist helps both you and the agent track progress through multi-step workflows. ### Implement feedback loops @@ -524,7 +524,7 @@ This pattern greatly improves output quality. 5. Finalize and save the document ``` -This shows the validation loop pattern using reference documents instead of scripts. The "validator" is STYLE\_GUIDE.md, and Claude performs the check by reading and comparing. +This shows the validation loop pattern using reference documents instead of scripts. The "validator" is STYLE\_GUIDE.md, and the agent performs the check by reading and comparing. **Example 2: Document editing process** (for Skills with code): @@ -593,7 +593,7 @@ Choose one term and use it throughout the Skill: * Mix "field", "box", "element", "control" * Mix "extract", "pull", "get", "retrieve" -Consistency helps Claude understand and follow instructions. +Consistency helps agents understand and follow instructions. ## Common patterns @@ -688,11 +688,11 @@ chore: update dependencies and refactor error handling Follow this style: type(scope): brief description, then detailed explanation. ```` -Examples help Claude understand the desired style and level of detail more clearly than descriptions alone. +Examples help agents understand the desired style and level of detail more clearly than descriptions alone. ### Conditional workflow pattern -Guide Claude through decision points: +Guide agents through decision points: ```markdown theme={null} ## Document modification workflow @@ -715,7 +715,7 @@ Guide Claude through decision points: ``` <Tip> - If workflows become large or complicated with many steps, consider pushing them into separate files and tell Claude to read the appropriate file based on the task at hand. + If workflows become large or complicated with many steps, consider pushing them into separate files and tell the agent to read the appropriate file based on the task at hand. </Tip> ## Evaluation and iteration @@ -726,9 +726,9 @@ Guide Claude through decision points: **Evaluation-driven development:** -1. **Identify gaps**: Run Claude on representative tasks without a Skill. Document specific failures or missing context +1. **Identify gaps**: Run your agent on representative tasks without a Skill. Document specific failures or missing context 2. **Create evaluations**: Build three scenarios that test these gaps -3. **Establish baseline**: Measure Claude's performance without the Skill +3. **Establish baseline**: Measure the agent's performance without the Skill 4. **Write minimal instructions**: Create just enough content to address the gaps and pass evaluations 5. **Iterate**: Execute evaluations, compare against baseline, and refine @@ -753,51 +753,51 @@ This approach ensures you're solving actual problems rather than anticipating re This example demonstrates a data-driven evaluation with a simple testing rubric. We do not currently provide a built-in way to run these evaluations. Users can create their own evaluation system. Evaluations are your source of truth for measuring Skill effectiveness. </Note> -### Develop Skills iteratively with Claude +### Develop Skills iteratively with the agent -The most effective Skill development process involves Claude itself. Work with one instance of Claude ("Claude A") to create a Skill that will be used by other instances ("Claude B"). Claude A helps you design and refine instructions, while Claude B tests them in real tasks. This works because Claude models understand both how to write effective agent instructions and what information agents need. +The most effective Skill development process involves the agent itself. Work with one instance ("Agent A") to create a Skill that will be used by other instances ("Agent B"). Agent A helps you design and refine instructions, while Agent B tests them in real tasks. This works because the underlying models understand both how to write effective agent instructions and what information agents need. **Creating a new Skill:** -1. **Complete a task without a Skill**: Work through a problem with Claude A using normal prompting. As you work, you'll naturally provide context, explain preferences, and share procedural knowledge. Notice what information you repeatedly provide. +1. **Complete a task without a Skill**: Work through a problem with Agent A using normal prompting. As you work, you'll naturally provide context, explain preferences, and share procedural knowledge. Notice what information you repeatedly provide. 2. **Identify the reusable pattern**: After completing the task, identify what context you provided that would be useful for similar future tasks. **Example**: If you worked through a BigQuery analysis, you might have provided table names, field definitions, filtering rules (like "always exclude test accounts"), and common query patterns. -3. **Ask Claude A to create a Skill**: "Create a Skill that captures this BigQuery analysis pattern we just used. Include the table schemas, naming conventions, and the rule about filtering test accounts." +3. **Ask Agent A to create a Skill**: "Create a Skill that captures this BigQuery analysis pattern we just used. Include the table schemas, naming conventions, and the rule about filtering test accounts." <Tip> - Claude models understand the Skill format and structure natively. You don't need special system prompts or a "writing skills" skill to get Claude to help create Skills. Simply ask Claude to create a Skill and it will generate properly structured SKILL.md content with appropriate frontmatter and body content. + Modern agents understand the Skill format and structure natively. You don't need special system prompts or a "writing skills" skill to get help creating Skills. Simply ask the agent to create a Skill and it will generate properly structured SKILL.md content with appropriate frontmatter and body content. </Tip> -4. **Review for conciseness**: Check that Claude A hasn't added unnecessary explanations. Ask: "Remove the explanation about what win rate means - Claude already knows that." +4. **Review for conciseness**: Check that Agent A hasn't added unnecessary explanations. Ask: "Remove the explanation about what win rate means - the agent already knows that." -5. **Improve information architecture**: Ask Claude A to organize the content more effectively. For example: "Organize this so the table schema is in a separate reference file. We might add more tables later." +5. **Improve information architecture**: Ask Agent A to organize the content more effectively. For example: "Organize this so the table schema is in a separate reference file. We might add more tables later." -6. **Test on similar tasks**: Use the Skill with Claude B (a fresh instance with the Skill loaded) on related use cases. Observe whether Claude B finds the right information, applies rules correctly, and handles the task successfully. +6. **Test on similar tasks**: Use the Skill with Agent B (a fresh instance with the Skill loaded) on related use cases. Observe whether Agent B finds the right information, applies rules correctly, and handles the task successfully. -7. **Iterate based on observation**: If Claude B struggles or misses something, return to Claude A with specifics: "When Claude used this Skill, it forgot to filter by date for Q4. Should we add a section about date filtering patterns?" +7. **Iterate based on observation**: If Agent B struggles or misses something, return to Agent A with specifics: "When the agent used this Skill, it forgot to filter by date for Q4. Should we add a section about date filtering patterns?" **Iterating on existing Skills:** The same hierarchical pattern continues when improving Skills. You alternate between: -* **Working with Claude A** (the expert who helps refine the Skill) -* **Testing with Claude B** (the agent using the Skill to perform real work) -* **Observing Claude B's behavior** and bringing insights back to Claude A +* **Working with Agent A** (the expert who helps refine the Skill) +* **Testing with Agent B** (the agent using the Skill to perform real work) +* **Observing Agent B's behavior** and bringing insights back to Agent A -1. **Use the Skill in real workflows**: Give Claude B (with the Skill loaded) actual tasks, not test scenarios +1. **Use the Skill in real workflows**: Give Agent B (with the Skill loaded) actual tasks, not test scenarios -2. **Observe Claude B's behavior**: Note where it struggles, succeeds, or makes unexpected choices +2. **Observe Agent B's behavior**: Note where it struggles, succeeds, or makes unexpected choices - **Example observation**: "When I asked Claude B for a regional sales report, it wrote the query but forgot to filter out test accounts, even though the Skill mentions this rule." + **Example observation**: "When I asked Agent B for a regional sales report, it wrote the query but forgot to filter out test accounts, even though the Skill mentions this rule." -3. **Return to Claude A for improvements**: Share the current SKILL.md and describe what you observed. Ask: "I noticed Claude B forgot to filter test accounts when I asked for a regional report. The Skill mentions filtering, but maybe it's not prominent enough?" +3. **Return to Agent A for improvements**: Share the current SKILL.md and describe what you observed. Ask: "I noticed Agent B forgot to filter test accounts when I asked for a regional report. The Skill mentions filtering, but maybe it's not prominent enough?" -4. **Review Claude A's suggestions**: Claude A might suggest reorganizing to make rules more prominent, using stronger language like "MUST filter" instead of "always filter", or restructuring the workflow section. +4. **Review Agent A's suggestions**: Agent A might suggest reorganizing to make rules more prominent, using stronger language like "MUST filter" instead of "always filter", or restructuring the workflow section. -5. **Apply and test changes**: Update the Skill with Claude A's refinements, then test again with Claude B on similar requests +5. **Apply and test changes**: Update the Skill with Agent A's refinements, then test again with Agent B on similar requests 6. **Repeat based on usage**: Continue this observe-refine-test cycle as you encounter new scenarios. Each iteration improves the Skill based on real agent behavior, not assumptions. @@ -807,18 +807,18 @@ The same hierarchical pattern continues when improving Skills. You alternate bet 2. Ask: Does the Skill activate when expected? Are instructions clear? What's missing? 3. Incorporate feedback to address blind spots in your own usage patterns -**Why this approach works**: Claude A understands agent needs, you provide domain expertise, Claude B reveals gaps through real usage, and iterative refinement improves Skills based on observed behavior rather than assumptions. +**Why this approach works**: Agent A understands agent needs, you provide domain expertise, Agent B reveals gaps through real usage, and iterative refinement improves Skills based on observed behavior rather than assumptions. -### Observe how Claude navigates Skills +### Observe how agents navigate Skills -As you iterate on Skills, pay attention to how Claude actually uses them in practice. Watch for: +As you iterate on Skills, pay attention to how agents actually use them in practice. Watch for: -* **Unexpected exploration paths**: Does Claude read files in an order you didn't anticipate? This might indicate your structure isn't as intuitive as you thought -* **Missed connections**: Does Claude fail to follow references to important files? Your links might need to be more explicit or prominent -* **Overreliance on certain sections**: If Claude repeatedly reads the same file, consider whether that content should be in the main SKILL.md instead -* **Ignored content**: If Claude never accesses a bundled file, it might be unnecessary or poorly signaled in the main instructions +* **Unexpected exploration paths**: Does the agent read files in an order you didn't anticipate? This might indicate your structure isn't as intuitive as you thought +* **Missed connections**: Does the agent fail to follow references to important files? Your links might need to be more explicit or prominent +* **Overreliance on certain sections**: If the agent repeatedly reads the same file, consider whether that content should be in the main SKILL.md instead +* **Ignored content**: If the agent never accesses a bundled file, it might be unnecessary or poorly signaled in the main instructions -Iterate based on these observations rather than assumptions. The 'name' and 'description' in your Skill's metadata are particularly critical. Claude uses these when deciding whether to trigger the Skill in response to the current task. Make sure they clearly describe what the Skill does and when it should be used. +Iterate based on these observations rather than assumptions. The 'name' and 'description' in your Skill's metadata are particularly critical. Agents use these when deciding whether to trigger the Skill in response to the current task. Make sure they clearly describe what the Skill does and when it should be used. ## Anti-patterns to avoid @@ -854,7 +854,7 @@ The sections below focus on Skills that include executable scripts. If your Skil ### Solve, don't punt -When writing scripts for Skills, handle error conditions rather than punting to Claude. +When writing scripts for Skills, handle error conditions rather than punting to the agent. **Good example: Handle errors explicitly**: @@ -876,15 +876,15 @@ def process_file(path): return '' ``` -**Bad example: Punt to Claude**: +**Bad example: Punt to the agent**: ```python theme={null} def process_file(path): - # Just fail and let Claude figure it out + # Just fail and let the agent figure it out return open(path).read() ``` -Configuration parameters should also be justified and documented to avoid "voodoo constants" (Ousterhout's law). If you don't know the right value, how will Claude determine it? +Configuration parameters should also be justified and documented to avoid "voodoo constants" (Ousterhout's law). If you don't know the right value, how will the agent determine it? **Good example: Self-documenting**: @@ -907,7 +907,7 @@ RETRIES = 5 # Why 5? ### Provide utility scripts -Even if Claude could write a script, pre-made scripts offer advantages: +Even if your agent could write a script, pre-made scripts offer advantages: **Benefits of utility scripts**: @@ -918,9 +918,9 @@ Even if Claude could write a script, pre-made scripts offer advantages: <img src="https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-executable-scripts.png?fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=4bbc45f2c2e0bee9f2f0d5da669bad00" alt="Bundling executable scripts alongside instruction files" data-og-width="2048" width="2048" data-og-height="1154" height="1154" data-path="images/agent-skills-executable-scripts.png" data-optimize="true" data-opv="3" srcset="https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-executable-scripts.png?w=280&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=9a04e6535a8467bfeea492e517de389f 280w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-executable-scripts.png?w=560&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=e49333ad90141af17c0d7651cca7216b 560w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-executable-scripts.png?w=840&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=954265a5df52223d6572b6214168c428 840w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-executable-scripts.png?w=1100&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=2ff7a2d8f2a83ee8af132b29f10150fd 1100w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-executable-scripts.png?w=1650&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=48ab96245e04077f4d15e9170e081cfb 1650w, https://mintcdn.com/anthropic-claude-docs/4Bny2bjzuGBK7o00/images/agent-skills-executable-scripts.png?w=2500&fit=max&auto=format&n=4Bny2bjzuGBK7o00&q=85&s=0301a6c8b3ee879497cc5b5483177c90 2500w" /> -The diagram above shows how executable scripts work alongside instruction files. The instruction file (forms.md) references the script, and Claude can execute it without loading its contents into context. +The diagram above shows how executable scripts work alongside instruction files. The instruction file (forms.md) references the script, and the agent can execute it without loading its contents into context. -**Important distinction**: Make clear in your instructions whether Claude should: +**Important distinction**: Make clear in your instructions whether the agent should: * **Execute the script** (most common): "Run `analyze_form.py` to extract fields" * **Read it as reference** (for complex logic): "See `analyze_form.py` for the field extraction algorithm" @@ -962,7 +962,7 @@ python scripts/fill_form.py input.pdf fields.json output.pdf ### Use visual analysis -When inputs can be rendered as images, have Claude analyze them: +When inputs can be rendered as images, have the agent analyze them: ````markdown theme={null} ## Form layout analysis @@ -973,20 +973,20 @@ When inputs can be rendered as images, have Claude analyze them: ``` 2. Analyze each page image to identify form fields -3. Claude can see field locations and types visually +3. The agent can see field locations and types visually ```` <Note> In this example, you'd need to write the `pdf_to_images.py` script. </Note> -Claude's vision capabilities help understand layouts and structures. +Agent vision capabilities help understand layouts and structures. ### Create verifiable intermediate outputs -When Claude performs complex, open-ended tasks, it can make mistakes. The "plan-validate-execute" pattern catches errors early by having Claude first create a plan in a structured format, then validate that plan with a script before executing it. +When agents perform complex, open-ended tasks, they can make mistakes. The "plan-validate-execute" pattern catches errors early by having the agent first create a plan in a structured format, then validate that plan with a script before executing it. -**Example**: Imagine asking Claude to update 50 form fields in a PDF based on a spreadsheet. Without validation, Claude might reference non-existent fields, create conflicting values, miss required fields, or apply updates incorrectly. +**Example**: Imagine asking the agent to update 50 form fields in a PDF based on a spreadsheet. Without validation, it might reference non-existent fields, create conflicting values, miss required fields, or apply updates incorrectly. **Solution**: Use the workflow pattern shown above (PDF form filling), but add an intermediate `changes.json` file that gets validated before applying changes. The workflow becomes: analyze → **create plan file** → **validate plan** → execute → verify. @@ -994,12 +994,12 @@ When Claude performs complex, open-ended tasks, it can make mistakes. The "plan- * **Catches errors early**: Validation finds problems before changes are applied * **Machine-verifiable**: Scripts provide objective verification -* **Reversible planning**: Claude can iterate on the plan without touching originals +* **Reversible planning**: The agent can iterate on the plan without touching originals * **Clear debugging**: Error messages point to specific problems **When to use**: Batch operations, destructive changes, complex validation rules, high-stakes operations. -**Implementation tip**: Make validation scripts verbose with specific error messages like "Field 'signature\_date' not found. Available fields: customer\_name, order\_total, signature\_date\_signed" to help Claude fix issues. +**Implementation tip**: Make validation scripts verbose with specific error messages like "Field 'signature\_date' not found. Available fields: customer\_name, order\_total, signature\_date\_signed" to help the agent fix issues. ### Package dependencies @@ -1008,32 +1008,32 @@ Skills run in the code execution environment with platform-specific limitations: * **claude.ai**: Can install packages from npm and PyPI and pull from GitHub repositories * **Anthropic API**: Has no network access and no runtime package installation -List required packages in your SKILL.md and verify they're available in the [code execution tool documentation](/en/docs/agents-and-tools/tool-use/code-execution-tool). +List required packages in your SKILL.md and verify they're available in the [code execution tool documentation](https://platform.claude.com/docs/en/agents-and-tools/tool-use/code-execution-tool). ### Runtime environment -Skills run in a code execution environment with filesystem access, bash commands, and code execution capabilities. For the conceptual explanation of this architecture, see [The Skills architecture](/en/docs/agents-and-tools/agent-skills/overview#the-skills-architecture) in the overview. +Skills run in a code execution environment with filesystem access, bash commands, and code execution capabilities. For the conceptual explanation of this architecture, see [The Skills architecture](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#the-skills-architecture) in the overview. **How this affects your authoring:** -**How Claude accesses Skills:** +**How agents access Skills:** 1. **Metadata pre-loaded**: At startup, the name and description from all Skills' YAML frontmatter are loaded into the system prompt -2. **Files read on-demand**: Claude uses bash Read tools to access SKILL.md and other files from the filesystem when needed +2. **Files read on-demand**: Agents use their file-reading tools to access SKILL.md and other files from the filesystem when needed 3. **Scripts executed efficiently**: Utility scripts can be executed via bash without loading their full contents into context. Only the script's output consumes tokens 4. **No context penalty for large files**: Reference files, data, or documentation don't consume context tokens until actually read -* **File paths matter**: Claude navigates your skill directory like a filesystem. Use forward slashes (`reference/guide.md`), not backslashes +* **File paths matter**: Agents navigate your skill directory like a filesystem. Use forward slashes (`reference/guide.md`), not backslashes * **Name files descriptively**: Use names that indicate content: `form_validation_rules.md`, not `doc2.md` * **Organize for discovery**: Structure directories by domain or feature * Good: `reference/finance.md`, `reference/sales.md` * Bad: `docs/file1.md`, `docs/file2.md` * **Bundle comprehensive resources**: Include complete API docs, extensive examples, large datasets; no context penalty until accessed -* **Prefer scripts for deterministic operations**: Write `validate_form.py` rather than asking Claude to generate validation code +* **Prefer scripts for deterministic operations**: Write `validate_form.py` rather than asking the agent to generate validation code * **Make execution intent clear**: * "Run `analyze_form.py` to extract fields" (execute) * "See `analyze_form.py` for the extraction algorithm" (read as reference) -* **Test file access patterns**: Verify Claude can navigate your directory structure by testing with real requests +* **Test file access patterns**: Verify the agent can navigate your directory structure by testing with real requests **Example:** @@ -1046,9 +1046,9 @@ bigquery-skill/ └── product.md (usage analytics) ``` -When the user asks about revenue, Claude reads SKILL.md, sees the reference to `reference/finance.md`, and invokes bash to read just that file. The sales.md and product.md files remain on the filesystem, consuming zero context tokens until needed. This filesystem-based model is what enables progressive disclosure. Claude can navigate and selectively load exactly what each task requires. +When the user asks about revenue, the agent reads SKILL.md, sees the reference to `reference/finance.md`, and invokes bash to read just that file. The sales.md and product.md files remain on the filesystem, consuming zero context tokens until needed. This filesystem-based model is what enables progressive disclosure. Agents can navigate and selectively load exactly what each task requires. -For complete details on the technical architecture, see [How Skills work](/en/docs/agents-and-tools/agent-skills/overview#how-skills-work) in the Skills overview. +For complete details on the technical architecture, see [How Skills work](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#how-skills-work) in the Skills overview. ### MCP tool references @@ -1068,7 +1068,7 @@ Where: * `BigQuery` and `GitHub` are MCP server names * `bigquery_schema` and `create_issue` are the tool names within those servers -Without the server prefix, Claude may fail to locate the tool, especially when multiple MCP servers are available. +Without the server prefix, agents may fail to locate the tool, especially when multiple MCP servers are available. ### Avoid assuming tools are installed @@ -1092,11 +1092,11 @@ reader = PdfReader("file.pdf") ### YAML frontmatter requirements -The SKILL.md frontmatter requires `name` (64 characters max) and `description` (1024 characters max) fields. See the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview#skill-structure) for complete structure details. +The SKILL.md frontmatter requires `name` (64 characters max) and `description` (1024 characters max) fields. See the [Skills overview](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#skill-structure) for complete structure details. ### Token budgets -Keep SKILL.md body under 500 lines for optimal performance. If your content exceeds this, split it into separate files using the progressive disclosure patterns described earlier. For architectural details, see the [Skills overview](/en/docs/agents-and-tools/agent-skills/overview#how-skills-work). +Keep SKILL.md body under 500 lines for optimal performance. If your content exceeds this, split it into separate files using the progressive disclosure patterns described earlier. For architectural details, see the [Skills overview](https://platform.claude.com/docs/en/agents-and-tools/agent-skills/overview#how-skills-work). ## Checklist for effective Skills @@ -1117,7 +1117,7 @@ Before sharing a Skill, verify: ### Code and scripts -* [ ] Scripts solve problems rather than punt to Claude +* [ ] Scripts solve problems rather than punt to the agent * [ ] Error handling is explicit and helpful * [ ] No "voodoo constants" (all values justified) * [ ] Required packages listed in instructions and verified as available @@ -1136,15 +1136,15 @@ Before sharing a Skill, verify: ## Next steps <CardGroup cols={2}> - <Card title="Get started with Agent Skills" icon="rocket" href="/en/docs/agents-and-tools/agent-skills/quickstart"> + <Card title="Get started with Agent Skills" icon="rocket" href="https://platform.claude.com/docs/en/agents-and-tools/agent-skills/quickstart"> Create your first Skill </Card> - <Card title="Use Skills in Claude Code" icon="terminal" href="/en/docs/claude-code/skills"> + <Card title="Use Skills in Claude Code" icon="terminal" href="https://code.claude.com/docs/en/skills"> Create and manage Skills in Claude Code </Card> - <Card title="Use Skills with the API" icon="code" href="/en/api/skills-guide"> + <Card title="Use Skills with the API" icon="code" href="https://platform.claude.com/docs/en/build-with-claude/skills-guide"> Upload and use Skills programmatically </Card> </CardGroup> diff --git a/src-tauri/experts/skills/writing-skills/persuasion-principles.md b/src-tauri/experts/skills/writing-skills/persuasion-principles.md index 9818a5f9..9756416a 100644 --- a/src-tauri/experts/skills/writing-skills/persuasion-principles.md +++ b/src-tauri/experts/skills/writing-skills/persuasion-principles.md @@ -33,7 +33,7 @@ LLMs respond to the same persuasion principles as humans. Understanding this psy **How it works in skills:** - Require announcements: "Announce skill usage" - Force explicit choices: "Choose A, B, or C" -- Use tracking: TodoWrite for checklists +- Use tracking: todos for checklists **When to use:** - Ensuring skills are actually followed @@ -80,8 +80,8 @@ LLMs respond to the same persuasion principles as humans. Understanding this psy **Example:** ```markdown -✅ Checklists without TodoWrite tracking = steps get skipped. Every time. -❌ Some people find TodoWrite helpful for checklists. +✅ Checklists without todo tracking = steps get skipped. Every time. +❌ Some people find a todo list helpful for checklists. ``` ### 5. Unity From 1be655e944e737242dbc1943d5f00db46b98365d Mon Sep 17 00:00:00 2001 From: xintaofei <itpkcn@gmail.com> Date: Wed, 17 Jun 2026 19:02:33 +0800 Subject: [PATCH 3/6] fix(experts): preserve executable bit for bundled shebang scripts include_dir! embeds file contents but not Unix permission bits, so bundled scripts (subagent-driven-development/scripts/* and the brainstorming companion's *.sh) extracted to ~/.codeg/skills as non-executable and failed when a skill invoked them by path. On extraction, restore the execute bit for any file whose contents start with a #! shebang, guarded by #[cfg(unix)]. The content hash is path/content-only, so this does not trigger re-extract loops. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- src-tauri/src/commands/experts.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src-tauri/src/commands/experts.rs b/src-tauri/src/commands/experts.rs index 97e38ac3..712ecc35 100644 --- a/src-tauri/src/commands/experts.rs +++ b/src-tauri/src/commands/experts.rs @@ -665,6 +665,20 @@ fn extract_bundle_dir( fs::create_dir_all(parent)?; } fs::write(&out_path, f.contents())?; + // `include_dir!` does not carry Unix permission bits, so bundled + // scripts (e.g. subagent-driven-development/scripts/* and the + // brainstorming companion's *.sh) would extract as non-executable + // and fail when a skill invokes them by path. Restore the execute + // bit for any file that declares a shebang. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if f.contents().starts_with(b"#!") { + let mut perms = fs::metadata(&out_path)?.permissions(); + perms.set_mode(perms.mode() | 0o111); + fs::set_permissions(&out_path, perms)?; + } + } } DirEntry::Dir(d) => { extract_bundle_dir(d, bundle_prefix, target)?; From fe11d3158adf0f251a90b703805ee042f58a3cee Mon Sep 17 00:00:00 2001 From: xintaofei <itpkcn@gmail.com> Date: Wed, 17 Jun 2026 20:51:45 +0800 Subject: [PATCH 4/6] chore(acp): bump claude code to 0.47.0 --- src-tauri/src/acp/registry.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src-tauri/src/acp/registry.rs b/src-tauri/src/acp/registry.rs index 17216250..a0fafe30 100644 --- a/src-tauri/src/acp/registry.rs +++ b/src-tauri/src/acp/registry.rs @@ -143,8 +143,8 @@ pub fn get_agent_meta(agent_type: AgentType) -> AcpAgentMeta { name: "Claude Code", description: "ACP wrapper for Anthropic's Claude", distribution: AgentDistribution::Npx { - version: "0.46.0", - package: "@agentclientprotocol/claude-agent-acp@0.46.0", + version: "0.47.0", + package: "@agentclientprotocol/claude-agent-acp@0.47.0", cmd: "claude-agent-acp", args: &[], env: &[], @@ -376,8 +376,8 @@ mod tests { fn registry_pins_current_acp_agent_versions() { assert_npx_version( AgentType::ClaudeCode, - "0.46.0", - "@agentclientprotocol/claude-agent-acp@0.46.0", + "0.47.0", + "@agentclientprotocol/claude-agent-acp@0.47.0", None, ); assert_npx_version( From 0fd27f15b4e132583db52982de8c337355436c1f Mon Sep 17 00:00:00 2001 From: xintaofei <itpkcn@gmail.com> Date: Wed, 17 Jun 2026 20:53:26 +0800 Subject: [PATCH 5/6] fix(install): support root and sudo-less installs Root users on minimal systems without sudo (containers, slim images) failed at the web-assets stage and were left with a half-installed tree that re-runs would not repair. - Add a privilege model (resolve_priv/priv_run): root writes directly, non-root uses sudo only when the target is not writable, and the installer fails fast with guidance when elevation is needed but sudo is missing. - Probe the nearest existing ancestor for writability so a not-yet-created web directory no longer forces the sudo path. - Make the version short-circuit require WEB_DIR/index.html so an install missing web assets self-heals on re-run instead of exiting early. --- install.sh | 118 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 94 insertions(+), 24 deletions(-) diff --git a/install.sh b/install.sh index 09fd3783..a225fcc4 100755 --- a/install.sh +++ b/install.sh @@ -10,6 +10,7 @@ set -euo pipefail REPO="xintaofei/codeg" INSTALL_DIR="${CODEG_INSTALL_DIR:-/usr/local/bin}" +WEB_DIR="${CODEG_WEB_DIR:-/usr/local/share/codeg/web}" VERSION="" # Stale codeg-server / codeg-mcp binaries elsewhere in PATH are removed by # default so the user's `codeg-server` command always runs the freshly @@ -117,6 +118,66 @@ read_bin_version() { rm -f "$tmp" } +# ── Privilege model ── +# +# Root can write anywhere and must NEVER call `sudo`: minimal root environments +# (containers, slim images) frequently don't ship sudo, and a blind `sudo mkdir` +# there aborts the whole script under `set -e` AFTER the binaries already landed +# — leaving a half-installed tree the version short-circuit then refuses to +# repair. A non-root user needs sudo only when the destination's nearest +# existing ancestor isn't writable. + +PRIV="" +IS_ROOT=0 +# Conservative default: if `id -u` somehow fails, assume NON-root (echo 1) so we +# fall back to writability-probing + sudo rather than wrongly skipping elevation. +# This is still correct for a real root whose `id` broke: `[ -w ]` on existing +# system dirs is true for root, so resolve_priv runs directly anyway. +if [ "$(id -u 2>/dev/null || echo 1)" = "0" ]; then + IS_ROOT=1 +fi + +HAVE_SUDO=0 +if command -v sudo >/dev/null 2>&1; then + HAVE_SUDO=1 +fi + +# Walk up from $1 to the first ancestor that already exists, so writability can +# be tested for a not-yet-created path (e.g. /usr/local/share/codeg/web, whose +# parent /usr/local/share/codeg also doesn't exist on a fresh install). +nearest_existing_ancestor() { + local p="$1" + while [ -n "$p" ] && [ "$p" != "/" ] && [ ! -e "$p" ]; do + p="$(dirname "$p")" + done + echo "$p" +} + +# Decide how to create/write into directory $1. Sets global PRIV to "" (run +# directly) or "sudo". Returns non-zero — without aborting under `set -e`, since +# callers invoke it via `if` — when elevation is required but sudo is absent. +resolve_priv() { + PRIV="" + [ "$IS_ROOT" -eq 1 ] && return 0 + local anchor + anchor="$(nearest_existing_ancestor "$1")" + [ -w "$anchor" ] && return 0 + if [ "$HAVE_SUDO" -eq 1 ]; then + PRIV="sudo" + return 0 + fi + return 1 +} + +# Run "$@", elevating with sudo only when the last resolve_priv call decided so. +priv_run() { + if [ -n "$PRIV" ]; then + sudo "$@" + else + "$@" + fi +} + # ── Scan PATH for codeg-server binaries that shadow the target install ── # # A binary "shadows" the install only if it appears in PATH BEFORE the @@ -188,16 +249,21 @@ fi TARGET_VER="${VERSION#v}" # Only short-circuit when the active binary is up to date AND the destination -# itself has it AND no other PATH entries shadow it. Otherwise we still need to -# install / clean up so the user's `codeg-server` command runs the new version. +# has it AND no other PATH entries shadow it AND the web assets are present. +# The web-asset check makes the installer self-healing: a prior run that placed +# the binary but failed before copying web/ (the classic root-without-sudo +# case) is repaired on re-run instead of exiting "nothing to do" forever. if [ -n "$CURRENT_VERSION" ] && [ "$CURRENT_VERSION" = "$TARGET_VER" ] \ && [ "${#PATH_CONFLICTS[@]}" -eq 0 ] \ - && [ -x "$DEST_BIN" ]; then - echo "codeg-server is already at version ${TARGET_VER}, nothing to do." + && [ -x "$DEST_BIN" ] \ + && [ -f "${WEB_DIR}/index.html" ]; then + echo "codeg-server is already at version ${TARGET_VER} with web assets in place, nothing to do." exit 0 fi -if [ -n "$CURRENT_VERSION" ]; then +if [ -n "$CURRENT_VERSION" ] && [ "$CURRENT_VERSION" = "$TARGET_VER" ]; then + echo "codeg-server is already at ${TARGET_VER}; reinstalling to repair the existing install..." +elif [ -n "$CURRENT_VERSION" ]; then echo "Upgrading codeg-server: ${CURRENT_VERSION} -> ${TARGET_VER}..." else echo "Installing codeg-server ${VERSION} (${PLATFORM}/${ARCH_SUFFIX})..." @@ -303,23 +369,28 @@ for _name in "${MANAGED_BINS[@]}"; do fi done -mkdir -p "$INSTALL_DIR" +# Resolve how to write into INSTALL_DIR, then create it and drop the binaries. +# Root writes directly; a non-root user uses sudo only when the prefix isn't +# already writable. Bail out clearly if elevation is needed but sudo is absent, +# instead of crashing mid-install under `set -e`. +if ! resolve_priv "$INSTALL_DIR"; then + echo "Error: need elevated privileges to install to ${INSTALL_DIR}, but 'sudo' is not installed." + echo " Re-run as root, install sudo, or set CODEG_INSTALL_DIR/CODEG_WEB_DIR to writable" + echo " paths (e.g. \$HOME/.local/bin and \$HOME/.local/share/codeg/web)." + exit 1 +fi +if [ -n "$PRIV" ]; then + echo "Need sudo to install to ${INSTALL_DIR}" +fi + +priv_run mkdir -p "$INSTALL_DIR" _install_one() { local name="$1" local src="${TMP_DIR}/${ARTIFACT}/${name}" local dst="${INSTALL_DIR}/${name}" - if [ -w "$INSTALL_DIR" ]; then - cp "$src" "$dst" - chmod +x "$dst" - else - sudo cp "$src" "$dst" - sudo chmod +x "$dst" - fi + priv_run cp "$src" "$dst" + priv_run chmod +x "$dst" } - -if [ ! -w "$INSTALL_DIR" ]; then - echo "Need sudo to install to ${INSTALL_DIR}" -fi for _name in "${MANAGED_BINS[@]}"; do _install_one "$_name" done @@ -332,17 +403,16 @@ DEST_BIN_REAL="$(canon_path "$DEST_BIN")" # ── Install web assets ── WEB_SRC="${TMP_DIR}/${ARTIFACT}/web" -WEB_DIR="${CODEG_WEB_DIR:-/usr/local/share/codeg/web}" if [ -d "$WEB_SRC" ]; then echo "Installing web assets to ${WEB_DIR}..." - if [ -w "$(dirname "$WEB_DIR")" ] 2>/dev/null; then - mkdir -p "$WEB_DIR" - cp -r "$WEB_SRC"/* "$WEB_DIR"/ - else - sudo mkdir -p "$WEB_DIR" - sudo cp -r "$WEB_SRC"/* "$WEB_DIR"/ + if ! resolve_priv "$WEB_DIR"; then + echo "Error: need elevated privileges to write ${WEB_DIR}, but 'sudo' is not installed." + echo " Re-run as root, install sudo, or set CODEG_WEB_DIR to a writable path." + exit 1 fi + priv_run mkdir -p "$WEB_DIR" + priv_run cp -r "$WEB_SRC"/* "$WEB_DIR"/ fi # ── Remove shadowing binaries from earlier PATH entries ── From 0bba379b68c45c5e578c52cee2c313daa04bface Mon Sep 17 00:00:00 2001 From: xintaofei <xintaofei@users.noreply.github.com> Date: Wed, 17 Jun 2026 21:15:28 +0800 Subject: [PATCH 6/6] # Release version 0.15.13 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - fix(chat): The session's model selector now shows each model's full name in a tooltip on hover — thanks to @H1d3rOne (#288). - fix(install): The install script now works for root users and on systems without sudo (containers, slim images), and a half-finished install self-heals on re-run instead of staying broken. - fix(experts): Bundled expert-skill scripts now keep their executable bit when extracted, so skills that invoke them by path no longer fail. - chore(acp): Bundled Claude Code updated to 0.47.0. - chore(experts): Bundled expert skills (superpowers) updated to v6.0.2. ----------------------------- # 发布版本 0.15.13 - 修复(聊天):会话模型选择器现在悬停时会以提示框显示每个模型的完整名称——感谢 @H1d3rOne (#288)。 - 修复(安装):安装脚本现在支持 root 用户以及没有 sudo 的系统(容器、精简镜像),未完成的安装在重新运行时会自动修复,而非保持损坏状态。 - 修复(专家技能):内置专家技能脚本在解压时保留可执行位,通过路径调用这些脚本的技能不再失败。 - 更新(ACP):内置 Claude Code 升级至 0.47.0。 - 更新(专家技能):内置专家技能(superpowers)升级至 v6.0.2。 --- .github/workflows/release.yml | 20 ++++++++++++++++++++ package.json | 2 +- src-tauri/Cargo.lock | 2 +- src-tauri/Cargo.toml | 2 +- src-tauri/tauri.conf.json | 2 +- 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2a72bfd0..3cab3289 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -248,6 +248,21 @@ jobs: sudo dpkg --add-architecture arm64 sudo apt-get update + # libgraphite2-3 / libharfbuzz0b / libfreetype6 / libssl3 are + # Multi-Arch: same — their :arm64 and :amd64 copies must be the + # EXACT same version. The runner ships stale :amd64 copies, and a + # jammy security bump (e.g. graphite2 1.3.14-1ubuntu0.1) made the + # :arm64 webkit dev chain demand a newer version than the preinstalled + # :amd64. apt then refuses the :arm64 copy ("not going to be + # installed" / held broken packages). Upgrading the preinstalled + # :amd64 copies first lets both arches land on the same version. + # --only-upgrade never installs new packages, so any lib that is + # absent is simply skipped. + sudo apt-get install -y --only-upgrade \ + libgraphite2-3 \ + libharfbuzz0b \ + libfreetype6 \ + libssl3 sudo apt-get install -y \ gcc-aarch64-linux-gnu \ g++-aarch64-linux-gnu \ @@ -455,6 +470,11 @@ jobs: sudo dpkg --add-architecture arm64 sudo apt-get update + # libssl3 is Multi-Arch: same; align the preinstalled :amd64 copy + # with the version libssl-dev:arm64 pins so the cross install isn't + # blocked by a held-back amd64 copy (same failure mode as the desktop + # arm64 build — see the build-tauri step for the full rationale). + sudo apt-get install -y --only-upgrade libssl3 sudo apt-get install -y \ gcc-aarch64-linux-gnu \ g++-aarch64-linux-gnu \ diff --git a/package.json b/package.json index 8f69ccd2..9639907f 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "codeg", "private": true, - "version": "0.15.12", + "version": "0.15.13", "scripts": { "dev": "next dev --turbopack", "build": "next build", diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index f5402649..e36d4837 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -982,7 +982,7 @@ checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] name = "codeg" -version = "0.15.12" +version = "0.15.13" dependencies = [ "aes-gcm", "agent-client-protocol-schema", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 6afa125d..4ab4da99 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codeg" -version = "0.15.12" +version = "0.15.13" description = "Agent Code Generation App" authors = ["feitao"] edition = "2021" diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 4af7e78c..6e39238f 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "https://schema.tauri.app/config/2", "productName": "codeg", - "version": "0.15.12", + "version": "0.15.13", "identifier": "app.codeg", "build": { "beforeDevCommand": "pnpm tauri:before-dev",