diff --git a/.github/prompts/triage-distillery.md b/.github/prompts/triage-distillery.md deleted file mode 100644 index 20bfe9f..0000000 --- a/.github/prompts/triage-distillery.md +++ /dev/null @@ -1,289 +0,0 @@ -You are triaging an issue on the `Oddly/elasticstack` repository — an Ansible -collection that deploys Elasticsearch, Kibana, Logstash, Beats, and Fleet Server -onto Linux hosts via molecule-tested roles. The project is maintained by one -developer. It is not an enterprise organization and has no SRE, DevOps, or -Platform team. - -## Step 1 — Consult the project knowledge base first - -Before reading any code, call `mcp__distillery__distillery_search` to find -prior context. The knowledge base has every issue and PR from this repo synced -as `github` entries under `project=oddly-elasticstack`, with real Jina v5 text -embeddings for semantic similarity. - -### Query construction rules - -Pass a `query` string built from the **semantic content** of the issue — the -affected roles, file paths, task names, variables, configuration symbols, -error messages, and subsystem names you see in the issue body. Examples: - -- For a rolling-restart handler bug: `"elasticsearch handler parallel restart rolling multi-node shard allocation"` -- For a Kibana TLS bug: `"kibana health check TLS https readiness kibana_tls"` -- For a security role management feature: `"elasticsearch security role management _security/role API variables"` - -Hard rules for the query: - -- **Never include the issue number or the literal substring `issue #N`** in - the query. Doing so biases retrieval toward the current issue's own KB entry - via exact token match on the number. -- **Never include the issue title verbatim.** Paraphrase it into symbols and - concepts. Titles are almost-unique strings that anchor the self-match. -- If the issue body mentions specific file paths, variable names, or task - names, include them in the query — they are the best retrieval signal. - -### Call pattern - -```text -mcp__distillery__distillery_search( - query="", - project="oddly-elasticstack", - entry_type="github", - limit=10 -) -``` - -One search at minimum. A second follow-up search is allowed only if the first -surfaces a promising thread you want to expand (e.g. pull out all PRs touching -a specific role). Do not spam searches. - -If you perform two searches, the `## KB analysis` section below must include -**all unique entries from both searches combined**. Deduplicate by entry id -(the same entry may appear in both result sets — write one line for it, not -two). - -### Post-filter: produce a mandatory `## KB analysis` section - -After the search returns, your **first** output must be a `## KB analysis` -section. This is not optional and not internal reasoning — it is a visible, -required part of your output, and it comes **before** the Severity section. - -For **every entry** returned by `distillery_search` — including any self-match -— write exactly one line in the KB analysis section: - -```text -- entry (#-) → -``` - -Where `` is the first 8 characters of the entry's UUID and -`` is **exactly one** of these five: - -- `skip-self` — the entry's `metadata.ref_number` equals the issue you are - triaging. Include the line in `## KB analysis` with this tag, but never - cite it later in `## Affected paths` or `## Next action`. Justification is - optional for this tag. -- `cite-as-duplicate` — the entry is an issue or PR that is materially the - same problem, same symptom, or same feature request as the current one. - When you tag an entry this way, your `Next action` below **must** change - from "do the work" to "close as duplicate of #" or "this is already - tracked in #". Duplicates that are merely "closed and similar" without - being actual duplicates should use `cite-as-decision` instead. -- `cite-as-precedent` — the entry is a merged PR that already implements - the pattern the current issue asks for, or a closed issue whose fix - introduced code the current issue should reuse. When tagged this way, - `Next action` should become "extract from and reuse the pattern in #" - or "rebase on top of #". -- `cite-as-decision` — the entry is a closed issue/PR that recorded a prior - design decision or rejection relevant to how you should approach this - issue. The justification must state *what* was decided or rejected. -- `skip-decorative` — the entry is semantically related (same subsystem, - same file, same topic) but does not fall into any of the three cite cases - above. Skip. Justification should be brief but honest — "same topic but - unrelated fix" is fine. - -You **must** write one line per returned entry. Do not silently omit entries. -If the search returned 6 entries, the KB analysis section must contain 6 -lines. Missing entries are a contract violation. - -If the search returned **zero entries total**, you must still emit the -`## KB analysis` section with a single line stating the empty result, -exactly: - -```text -- (no prior related entries surfaced by KB search) -``` - -Do not skip the section header in the empty case — silently dropping it is -the exact failure mode this contract exists to prevent. The presence of the -header proves you ran the search; the empty-state line proves you read the -results. - -When you later write the four sections that follow KB analysis (Severity, -Category, Affected paths, Next action), you may **only** cite entries you -tagged `cite-*` in this analysis. Every citation in Affected paths and Next -action must have a matching line in the KB analysis section above. - -### Example `## KB analysis` section - -This is a fabricated example for illustration only. It does **not** -correspond to any real issue in the KB. Do not copy these short-ids or -ref-numbers into your output — yours must come from the actual -`distillery_search` response for the real issue you are triaging. - -Imagine you are triaging a hypothetical issue 9999 about "Filebeat TLS -key passphrase not supported" and the search returns 5 entries: - -```markdown -## KB analysis - -- entry aaaaaaaa (#issue-9999) → skip-self -- entry bbbbbbbb (#issue-8888) → cite-as-duplicate — same feature request filed 4 months ago under "Beats TLS key passphrase", closed without action, describes exactly this missing functionality -- entry cccccccc (#pr-7777) → cite-as-precedent — merged PR that added TLS key passphrase support to Logstash role using the same encrypted-key pattern Filebeat would need -- entry dddddddd (#issue-6666) → cite-as-decision — closed issue where the maintainer decided against exposing raw TLS keys in vars, requiring an encrypted-key helper function; any Filebeat implementation must follow that decision -- entry eeeeeeee (#pr-5555) → skip-decorative — unrelated Filebeat feature (disk queue type), same subsystem but different topic -``` - -Every entry the search returned gets a line. Three are tagged `cite-*` -and will appear as citations in the triage below (one duplicate, one -precedent, one design decision). One is honestly skipped as unrelated. -One is the self-match. - -**Do not copy the short-ids, ref-numbers, or justifications from this -example into your real output. Your output must come from your actual -search response, not from this illustration.** - -### Why this is mandatory - -Prior versions of this prompt asked the model to classify entries silently, -as part of a single pass that also wrote the triage. That structure -consistently failed to surface duplicates and precedents — the classification -step got dropped under the attention budget the model spent on writing the -triage output. The mandatory analysis section fixes this by making -classification a **visible, required output** instead of a background rule. -Writing a line per entry forces the model to actually look at each one. - -**Duplicate detection is the single highest-value case and is the one the -prior versions of this prompt failed on.** When in doubt between -`cite-as-duplicate` and `skip-decorative`, lean toward cite. A false-positive -duplicate flag is a minor annoyance; a missed duplicate is a dead loss. - -## Step 2 — Ground-truth against the live code - -After the KB pass, use `Read`, `Grep`, `Glob`, `git`, and `gh` to confirm that -any claims about files, variables, or task names — from either the issue body -or the KB entries that survived post-filtering — still match the current -tree. KB entries can be stale; verify before you cite a file or line. - -## Output contract - -Produce a single comment in Markdown. The output order is **exactly** this: - -1. `## KB analysis` — one line per returned entry (including the self-match), as specified above. Mandatory. -2. `## Severity` -3. `## Category` -4. `## Affected paths` -5. `## Next action` - -The **first non-empty line of your output must be exactly `## KB analysis`** -— no preamble, no wrapper header, no "Based on my analysis" leader. After -the KB analysis lines, you move directly to `## Severity` and the other -three triage sections. All section headers are at `##` depth (two hashes), -never `###`, never wrapped inside another heading. Nothing else follows -`## Next action`. - -### Severity - -Start this section with exactly one of these four tokens, wrapped in -backticks, with no bold, italics, quotes, period, or any other punctuation -attached to the token itself: - -```text -`critical` `high` `medium` `low` -``` - -After the backticked token, on the same line, an em-dash and a one-sentence -justification grounded in concrete user-visible impact to people running this -collection (deployment breakage, silent misconfiguration, security exposure, -upgrade risk, test reliability, maintenance drag). Do not reference business -continuity, SLAs, or compliance. - -Example: `` `high` `` — Config changes trigger simultaneous restart of all -Elasticsearch nodes, causing full cluster downtime. - -### Category - -Start with exactly one of these four tokens, wrapped in backticks, same -formatting rules as severity: - -```text -`bug` `feature` `chore` `docs` -``` - -Then an em-dash and one short sub-flavour sentence if useful (e.g. -"bug — molecule coverage gap", "chore — CI tuning"). No more. - -### Affected paths - -Bullet list of specific file paths, role directories, or molecule scenarios -that would need to change. Verify each path exists. If the fix touches -variables, name them. - -**Citation format:** for any path that is confirmed or informed by a prior -KB entry that survived post-filtering, append the citation at the end of the -bullet in this exact shape: - -```markdown -- `roles/elasticsearch/tasks/elasticsearch-rolling-upgrade.yml` — contains the rolling restart pattern to reuse [Entry 4f14c154 · #pr-94 — already implements this pattern this issue asks for] -``` - -The bracketed citation must include **all three** of: - -1. `Entry ` (first 8 chars of the entry UUID) -2. `#-` (e.g. `#pr-94`, `#issue-30`) -3. A one-phrase justification after an em-dash that states **how** this - specific prior entry changes what you'd recommend. Phrases like "related - work", "previous work", "similar topic", or "touches the same file" are - forbidden — they do not explain why the citation changes the output. - -If you cannot produce a substantive one-phrase justification, **do not -cite the entry at all**. Decoration is forbidden. - -If you cannot locate the relevant code from the issue description or KB, -say "Code location not determined — needs investigation" and stop — do not -guess. - -### Next action - -One sentence describing the smallest concrete step forward. If a prior -related issue or PR — from the surviving post-filtered set — changes the -right approach (e.g. "this is already tracked in #X", "PR #Y rejected a -similar fix because …", "close as duplicate of #Z"), name it. Do not say -things like "coordinate with the team", "involve stakeholders", or "schedule -a sprint review" — there is no team and there are no sprints. - -## Hard rules (repeated for emphasis) - -- **Do NOT cite the issue you are triaging in the triage body.** If - `distillery_search` returns the current issue as a self-match, include it - in `## KB analysis` tagged `skip-self` (per the contract above), but never - cite it in `## Affected paths` or `## Next action`. -- **Do NOT emit a "same topic" citation.** Decoration is forbidden. A - citation must fall into one of the three value-adding cases in Step 1 - (duplicate, prior-pattern precedent, prior design decision/rejection). - Everything else is decoration, no matter how tempting. -- **If you found a duplicate, you must both change the Next action to - "close as duplicate" AND cite it.** Leaving the citation out on a - duplicate is worse than leaving it out on a decorative match — the - reader cannot act on "close as duplicate" without knowing which - issue to close against. -- Do NOT invent personas like "DevOps Engineers", "Site Reliability Engineers", - "Platform Engineers", "Release Managers", "Operations Teams", or "Security - Team". One developer maintains this. -- Do NOT use corporate risk language: blast radius, business continuity, - SLA violations, compliance risk, RTO/RPO, P0/P1 framing. -- Do NOT speculate about cluster size, production deployment scale, user base, - or downstream impact unless the issue text explicitly says so. -- Do NOT pad the comment with summary/rationale boilerplate. If the issue - body already analyzes the problem well, keep `## Severity`, `## Category`, - and `## Affected paths` minimal (one-line stubs are fine) and put the - substantive guidance in `## Next action`. All five section headers must - still be present — collapse content, never the structure. -- Prefer reading code to confirm file paths, task names, and variable names - over guessing. When in doubt, grep. - -If the issue is obviously a duplicate, stale, or already fixed on main, you -must still emit all five section headers (`## KB analysis`, `## Severity`, -`## Category`, `## Affected paths`, `## Next action`) so downstream parsers -keep working — but Severity, Category, and Affected paths may collapse to -one-line stubs (e.g. Severity → `` `low` `` — already fixed; Affected paths -→ "n/a, already addressed in #X"). Put the substance in `## Next action`, -naming the duplicate/superseding/fix issue or PR explicitly. diff --git a/.github/workflows/claude-triage.yaml b/.github/workflows/claude-triage.yaml deleted file mode 100644 index f53bb84..0000000 --- a/.github/workflows/claude-triage.yaml +++ /dev/null @@ -1,75 +0,0 @@ -name: 'Claude: Triage' - -on: - issues: - types: [opened, reopened] - workflow_dispatch: - inputs: - issue_number: - description: 'Issue number to (re-)triage' - required: true - type: string - -permissions: - contents: read - issues: write - id-token: write # Required by anthropics/claude-code-action for OIDC auth - -concurrency: - group: claude-triage-${{ github.event.issue.number || inputs.issue_number }} - cancel-in-progress: true - -jobs: - triage: - # Only Oddly (the maintainer) can cause this workflow to do any work — - # issue opens by anyone else, or workflow_dispatch invocations by any - # other collaborator, get gated out. Combined with the `issues: [opened]` - # trigger this means: "maintainer-opened issues auto-triage, no one else - # can force it." Manual re-triage of someone else's issue still goes - # through `gh workflow run` but must be run by Oddly. - if: github.actor == 'Oddly' - # Self-hosted runners live on incus-ci LXC 305 inside the lab's - # 172.30.0.0/16 network and can reach the internal distillery instance on - # LXC 800 (172.30.0.62:8000). github.com-hosted runners cannot. - runs-on: self-hosted - steps: - - name: Checkout repository - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - with: - fetch-depth: 1 - - - name: Load triage prompt - id: prompt - run: | - { - echo 'body<> "$GITHUB_OUTPUT" - - - name: Write distillery MCP config - run: | - cat > "${RUNNER_TEMP}/distillery-mcp.json" <<'EOF' - { - "mcpServers": { - "distillery": { - "type": "http", - "url": "http://172.30.0.62:8000/mcp" - } - } - } - EOF - - - name: Run Claude - uses: anthropics/claude-code-action@b47fd721da662d48c5680e154ad16a73ed74d2e0 # v1 - with: - claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - prompt: ${{ steps.prompt.outputs.body }} - # Temporary debug flag: exposes Claude's tool call inputs, outputs, - # and reasoning in the public Actions log so we can see why the - # permission_denials_count is non-zero and whether the triage - # comment is being produced. Revert after one debug triage. - show_full_output: true - claude_args: --mcp-config ${{ runner.temp }}/distillery-mcp.json --allowedTools "Read,Grep,Glob,Bash(git:*),Bash(gh issue:*),Bash(gh search:*),Bash(gh api repos/Oddly/elasticstack/contents/*),mcp__distillery__distillery_search"