From d7797fdb89a4f76682e3800d1b93015409fae1d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Gosta=C5=84ski?= Date: Thu, 18 Jun 2026 21:52:26 +0200 Subject: [PATCH 1/3] feat(researcher): add source-grounded research report plugin An interactive front-end skill gathers a research brief and launches a bundled Dynamic Workflow that fans out firecrawl retrieval (WebSearch fallback) into cited findings, gates rounds on coverage and contradictions, synthesizes once, edits for the audience, and renders an evolving, source-grounded HTML report where every claim traces to a numbered source. - workflows/research.js: Setup -> Plan (distinct sub-queries) -> assessor-gated round loop (parallel retrievers + Conflict-scout + deep-only Verifier feeding a single Assessor) -> Synthesizer (once) -> Editor (audience) -> Persist (state.json + snapshot, retried) -> Composer (retryable HTML render) - skills/research/SKILL.md: infer-first brief, slug/folder resolution (no index; per-report state.json is the registry), scriptPath launch, follow-up checkpoint - assets/: pinned chart.umd.js (4.5.0) + shipped report.css (fixed class vocab) - single evolving report: append-only source ids and content artifacts; prior output.html snapshotted before each overwrite - register researcher v0.1.0 in the marketplace - design captured in CONTEXT.md + docs/adr/0001-0009 --- .claude-plugin/marketplace.json | 18 + plugins/researcher/.claude-plugin/plugin.json | 17 + plugins/researcher/CHANGELOG.md | 35 + plugins/researcher/CONTEXT.md | 193 +++++ plugins/researcher/README.md | 118 +++ plugins/researcher/assets/VENDOR.md | 18 + plugins/researcher/assets/chart.umd.js | 14 + plugins/researcher/assets/report.css | 258 ++++++ .../0001-skill-launched-dynamic-workflow.md | 17 + ...0002-custom-workflow-over-deep-research.md | 18 + .../docs/adr/0003-two-tier-research-loop.md | 21 + ...tml-report-via-editor-composer-pipeline.md | 35 + .../docs/adr/0005-single-evolving-report.md | 19 + ...ning-core-gated-loop-single-synthesizer.md | 52 ++ ...hartjs-diagrams-mmdc-svg-agent-readable.md | 57 ++ ...ed-report-css-semantic-class-vocabulary.md | 50 ++ ...l-only-bash-rewrite-append-only-content.md | 56 ++ plugins/researcher/docs/diagrams/workflow.mmd | 84 ++ plugins/researcher/docs/diagrams/workflow.svg | 1 + plugins/researcher/skills/research/SKILL.md | 127 +++ plugins/researcher/workflows/research.js | 768 ++++++++++++++++++ 21 files changed, 1976 insertions(+) create mode 100644 plugins/researcher/.claude-plugin/plugin.json create mode 100644 plugins/researcher/CHANGELOG.md create mode 100644 plugins/researcher/CONTEXT.md create mode 100644 plugins/researcher/README.md create mode 100644 plugins/researcher/assets/VENDOR.md create mode 100644 plugins/researcher/assets/chart.umd.js create mode 100644 plugins/researcher/assets/report.css create mode 100644 plugins/researcher/docs/adr/0001-skill-launched-dynamic-workflow.md create mode 100644 plugins/researcher/docs/adr/0002-custom-workflow-over-deep-research.md create mode 100644 plugins/researcher/docs/adr/0003-two-tier-research-loop.md create mode 100644 plugins/researcher/docs/adr/0004-html-report-via-editor-composer-pipeline.md create mode 100644 plugins/researcher/docs/adr/0005-single-evolving-report.md create mode 100644 plugins/researcher/docs/adr/0006-reasoning-core-gated-loop-single-synthesizer.md create mode 100644 plugins/researcher/docs/adr/0007-charts-vendored-chartjs-diagrams-mmdc-svg-agent-readable.md create mode 100644 plugins/researcher/docs/adr/0008-presentation-shipped-report-css-semantic-class-vocabulary.md create mode 100644 plugins/researcher/docs/adr/0009-snapshot-mechanics-html-only-bash-rewrite-append-only-content.md create mode 100644 plugins/researcher/docs/diagrams/workflow.mmd create mode 100644 plugins/researcher/docs/diagrams/workflow.svg create mode 100644 plugins/researcher/skills/research/SKILL.md create mode 100644 plugins/researcher/workflows/research.js diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 249fca8..0faf42f 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -151,6 +151,24 @@ "clean-code", "refactoring" ] + }, + { + "name": "researcher", + "source": "./plugins/researcher", + "description": "Source-grounded research assistant: an interactive skill gathers a research brief, then launches a bundled Dynamic Workflow that fans out firecrawl retrieval (WebSearch fallback) into cited findings, gates rounds on coverage and contradictions, and renders a self-contained HTML report where every claim traces to a numbered source. The report evolves across follow-up runs.", + "version": "0.1.0", + "author": { + "name": "Mateusz Gostański (grixu)", + "email": "mateusz.gostanski@gmail.com" + }, + "category": "productivity", + "tags": [ + "research", + "web", + "firecrawl", + "citations", + "workflow" + ] } ] } diff --git a/plugins/researcher/.claude-plugin/plugin.json b/plugins/researcher/.claude-plugin/plugin.json new file mode 100644 index 0000000..4853812 --- /dev/null +++ b/plugins/researcher/.claude-plugin/plugin.json @@ -0,0 +1,17 @@ +{ + "name": "researcher", + "version": "0.1.0", + "description": "Source-grounded research assistant: an interactive skill gathers a research brief, then launches a bundled Dynamic Workflow that fans out firecrawl retrieval (WebSearch fallback) into cited findings, gates rounds on coverage and contradictions, and renders a self-contained HTML report where every claim traces to a numbered source. The report evolves across follow-up runs.", + "author": { + "name": "Mateusz Gostański (grixu)", + "email": "mateusz.gostanski@gmail.com" + }, + "repository": "https://github.com/grixu/cc-toolkit", + "keywords": [ + "research", + "web", + "firecrawl", + "citations", + "workflow" + ] +} diff --git a/plugins/researcher/CHANGELOG.md b/plugins/researcher/CHANGELOG.md new file mode 100644 index 0000000..53435d0 --- /dev/null +++ b/plugins/researcher/CHANGELOG.md @@ -0,0 +1,35 @@ +# Changelog + +All notable changes to the **researcher** plugin will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- `research` orchestrator skill — gathers a brief (infer-first; one consolidated prompt over only the dimensions it + can't infer: depth, recency, sources, audience), resolves the report folder (slugified goal; no index file — the + per-report `state.json` files are the registry), launches the bundled Dynamic Workflow, presents the manifest + + path, and runs the follow-up checkpoint that extends the evolving report. +- `workflows/research.js` — the bundled Dynamic Workflow: + - **Setup** — loads a prior `state.json` when extending (with a `schemaVersion` guard), preflights `mmdc`. + - **Plan** — derives distinct sub-query angles so parallel retrievers don't converge on the same hit. + - **Assessor-gated round loop** — parallel firecrawl retrievers (WebSearch fallback) emit findings with typed, + verbatim evidence spans and per-source trust tiers; deterministic dedup-by-URL assigns append-only source ids; a + **Conflict-scout** (every round) and, on a deep brief, an adversarial **Verifier** feed the **Assessor**, the + loop's single gate. Bounded by a per-depth round cap (quick=1 / standard=2 / deep=3), not a token budget. + - **Synthesizer** — runs once on green light; reconciles findings, surfaces residual contradictions with + attribution, composes an audience-neutral cited draft. + - **Editor** — the sole audience-aware stage; re-cuts for concision and the brief's audience tier and marks + earn-their-place visuals. + - **Composer** — renders an HTML report (semantic HTML against a fixed class vocabulary), snapshots the prior + version, copies the shipped assets, compiles diagrams, renders Chart.js charts, and writes `state.json`. Returns + only the path + a compact manifest. +- Shipped assets — a version-pinned `chart.umd.js` (Chart.js 4.5.0) and a `report.css` (system fonts, light/dark, + ~70ch column, sticky ToC sidebar, print styles), copied into each report so it stays offline and self-contained. +- Single **evolving** HTML report per topic with sidecar `diagrams/`, `assets/`, and `snapshots/` folders; source ids + are append-only and content artifacts are written append-only so older snapshots keep resolving. + +[Unreleased]: https://github.com/grixu/cc-toolkit diff --git a/plugins/researcher/CONTEXT.md b/plugins/researcher/CONTEXT.md new file mode 100644 index 0000000..b679676 --- /dev/null +++ b/plugins/researcher/CONTEXT.md @@ -0,0 +1,193 @@ +# Researcher + +A Claude Code plugin that answers a research question with a **source-grounded, cited report**. +An interactive front-end skill gathers the brief, then launches a bundled Dynamic Workflow that +fans out firecrawl-backed retrieval subagents (WebSearch fallback) into findings, gates rounds on +coverage and contradictions, then synthesises a cited answer where every claim is traceable to a +numbered source — rendered as an HTML report. + +## Language + +**Research brief**: +The structured spec the front-end skill produces from the user's question and answers — the goal +plus scope, depth, recency, source, and **audience** constraints. It is what drives a workflow run. +The **audience** is one of four coarse expertise tiers — `lay`, `informed`, `practitioner`, or +`expert` (`practitioner` = in the field but junior: knows the basics, not advanced terms or +abbreviations) — optionally refined by a one-line free-form descriptor (e.g. "a PM evaluating vendors"). It calibrates the +**Editor** *only* (no other stage reads it); the skill infers the tier from the question and available +context (e.g. a global CLAUDE.md), and only asks the user outright when it cannot. +_Avoid_: query, prompt + +**Source**: +A web resource (URL) discovered during retrieval, deduplicated by URL and assigned a stable +**numeric id** used for citation, plus a coarse **trust tier** — `primary/official` > +`reputable-secondary` > `community/unverified` — set by the **Retriever** at fetch. The tier lets the +**Assessor** dismiss noise conflicts (official docs vs a stale blog) without a round, the +**Synthesizer** weigh sources when reconciling, and the **Report's** Sources list show the reader the +basis. Corroboration (how many independent sources back a finding) is a further, emergent signal — not +a substitute for the tier. Ids are append-only across the evolving **Report**: follow-up runs add new +sources with new ids and never renumber existing ones, so inline citations stay valid. +_Avoid_: link, reference + +**Finding**: +A discrete claim extracted from one or more sources, tagged with the source ids it came from *and* a +typed **evidence span**. Each span has a `kind`: a `quote` (verbatim source text — the default, and the +only kind a cheap deterministic string-check against the scrape can confirm), an `image_region` (url + +alt/caption, for charts and infographics), or a `locator` (page/timestamp + the retriever's paraphrase, +for paywalled or non-text sources). Non-text kinds are explicitly **non-verbatim**, so the fidelity +guarantee degrades *visibly* rather than silently — a `quote` is audited by construction; anything else +announces that it isn't. +_Avoid_: result, fact + +**Report**: +The deliverable — an HTML document backed by two sidecar folders, `diagrams/` (`.mmd` sources + +compiled SVGs) and `assets/` (the vendored chart library, the shipped `report.css`, and any +irreplaceable images), carrying the stated **goal**, a numbered list of **sources**, and an **answer** whose claims cite source ids inline +(e.g. "… happened in 2024 [2][5]"). Its body is kept readable by humans *and* agents — it is routinely +consumed as documentation, so heavy artifacts go to those sidecars and the body keeps only their +references and semantic pointers. It evolves across follow-up runs rather than spawning a new file each time: every +run merges its new **Findings** and the **Editor** re-cuts the whole **answer**, so the document stays +concise as it grows. The prior file is snapshotted before each overwrite. +_Avoid_: summary, output + +**Research round**: +One pass of plan → parallel retrieval → extract → dedup (by URL, assign append-only ids) → +**conflict-scout** (a *deep* brief inserts a **Verifier** pass next). Merge is *not* a per-round step — +it is deferred to the **Synthesizer**, which runs once after the **Assessor** green-lights coverage. +Rounds run in two tiers: *within* a workflow run the +**assessor** gates them autonomously; *between* runs the user steers via follow-up questions. Bounded +by a hard round cap per depth — *not* a token budget (Claude Code does not reliably expose one). Both a +**Conflict-scout** conflict and a **Verifier** refutation that needs fresh evidence become gaps filled +by the *same* round retrieval — there is one information-pulling mechanism, not several. +_Avoid_: iteration + +**Retriever**: +A workflow subagent that fetches from an external source. Today: **firecrawl** (search/scrape), +with **WebSearch** as fallback. Designed to admit more retrievers later (Perplexity, Gemini deep +research) without changing the report contract. Alongside text, a retriever records **candidate +image URLs** from each page (so the Composer can later fetch the ones worth including) and assigns each +**Source** its coarse **trust tier** at fetch. +_Avoid_: scraper, crawler, fetcher + +**Assessor** (coverage assessor): +The loop's single gate. A subagent that, after a round, judges whether more research is needed — +weighing subject complexity, accumulated context size, explicit user intent (a "deep research" brief +biases toward more rounds), the **Conflict-scout's** `conflicts[]`, and — on a deep brief — the +**Verifier's** unresolved refutations. It judges each conflict's/refutation's **materiality** against +the brief's goal and planned sub-questions (a material, resolvable one is itself a gap). It green-lights +only when coverage is sufficient *and* no material, resolvable conflict or refutation remains; +otherwise it emits the gaps plus proposed follow-up questions. +_Avoid_: evaluator, critic + +**Conflict-scout**: +A subagent that runs each round after dedup, *before* the **Assessor**: it diffs the accumulated +**Findings** for contradictions and emits `conflicts[]`, each tagging the clashing finding/source ids +with a *hint* at whether it is resolvable by more retrieval. **Materiality is not the scout's call** — +the **Assessor** judges it against the brief's goal and planned sub-questions (no drafted answer exists +yet). The scout only detects — it neither gates the loop (the **Assessor** does) nor writes prose (the +**Synthesizer** does), and it compares claims against *each other*, not ground truth, so it is no +fact-checker. +_Avoid_: verifier, fact-checker, referee + +**Verifier**: +A depth-gated adversarial subagent — runs only on a *deep* brief, each round, after the +**Conflict-scout**. It tries to *refute* the material **Findings**, reasoning over the already-gathered +corpus (other findings, source **trust tiers**, internal logic) — it does **not** fetch itself. A +finding it can refute outright is dropped; one it cannot settle without fresh counter-evidence becomes +a gap the **Assessor** acts on (filled by an ordinary round, the same mechanism conflicts use). It +judges *truth/reliability*, where the **Conflict-scout** judges only mutual *consistency*. Quick and +standard briefs skip it. +_Avoid_: fact-checker, skeptic, critic + +**Synthesizer**: +The reasoning core. A subagent that runs **once**, only after the **Assessor** green-lights coverage: +it reads the full deduplicated **Findings** and **Sources**, reconciles findings where they can be +reconciled, surfaces with attribution the residual contradictions the **Conflict-scout** flagged as +irreducible (it never hides them), and composes the structured draft **answer** with inline `[id]` +citations. It composes **audience-neutral** — the full, faithful argument with all its nuance and +caveats, never pre-trimmed for a reader; adapting it to the brief's audience is the **Editor's** job. +Turning gathered claims into a coherent, cited argument is its work — distinct from the **Editor**, +which adapts that draft to the reader and cuts and clarifies it afterward. On a follow-up run it +re-synthesizes the *whole* answer from the accumulated findings (holistic, per ADR-0005), not just the +new material. +_Avoid_: writer, merger, aggregator + +**Editor**: +A subagent that adapts the draft **answer** to its reader before it is rendered — the **sole +audience-aware stage**. Guided by the brief's audience tier (`lay` / `informed` / `practitioner` / `expert`) it sets how +much jargon to define, how much prior knowledge to assume, and how dense to write: for `lay` it defines +terms inline, leads with intuition, and cuts expert-only nuance; for `informed` it assumes general +literacy but defines field-specific terms; for `practitioner` it assumes the basics yet still defines +advanced terms and expands abbreviations on first use; for `expert` it assumes the terminology (jargon +and abbreviations), trims background, and foregrounds caveats and edge cases. Throughout it adversarially cuts +redundancy and filler — without flattening the **Findings'** accuracy. It also marks where a visual (a +Mermaid diagram, table, or chart) would carry an idea better than prose — leaning on more of them for a +`lay` reader, fewer for an `expert` — and specifies what it should show, so the **Composer** renders +only visuals that earn their place. Independent of +whoever drafted the answer, so the cutting is a second pair of eyes, not self-grading. +_Avoid_: proofreader, summarizer + +**Composer**: +The final stage of a workflow run — a subagent that renders the editor-approved **answer** as the +HTML **Report**: it builds the linear document (table of contents + anchors) and keeps the HTML body +**readable by humans and agents alike** — heavy artifacts live in the sidecar `diagrams/` and `assets/` +folders and the body carries only their references, since the Report is itself read as documentation. +It does not design: the body is semantic HTML against a fixed class vocabulary, styled by a shipped +`report.css` (copied into `assets/`, linked relatively), so every Report and snapshot shares one +identity and the head carries no bespoke `

Dynamic Workflow — subagenty (do wątku głównego wraca tylko ścieżka + manifest)

2. Dobieranie kontekstu — front-end skill (wątek główny)

Research round — równolegle (×N)

kolejna runda
(cap rund + budżet)

pokrycie OK

user wybiera/dodaje pytania →
nowy run rozszerza TEN SAM
ewoluujący Report

koniec

1. Input: pytanie researchowe

Subagent Explore:
codebase

Subagent Explore:
dokumentacja

CLAUDE.md
(inferencja audience)

Zadanie:
Jira / Linear MCP

Merge →
Research brief
(+ audience)

3. Plan: podział na pod-pytania
+ ustalenie zależności

Retriever: firecrawl
(WebSearch fallback)

Retriever: firecrawl

Retriever: firecrawl

Findings + kandydaci
na obrazki [src ids]

Findings [src ids]

Findings [src ids]

Merge + cross-check →
answer (findings)

4. Assessor:
czy brakuje pokrycia?
5. Editor: tnij lanie wody,
de-jargon pod audience,
oznacz wizualizacje
6. Composer: HTML report
ToC + [n] + mmdc→SVG
+ assets, snapshot, state
7. Prezentacja: manifest inline
+ ścieżka + oferta otwarcia

Follow-up?

Koniec

\ No newline at end of file diff --git a/plugins/researcher/skills/research/SKILL.md b/plugins/researcher/skills/research/SKILL.md new file mode 100644 index 0000000..ab9e06b --- /dev/null +++ b/plugins/researcher/skills/research/SKILL.md @@ -0,0 +1,127 @@ +--- +name: research +description: "Produce a source-grounded, cited HTML research report on a question. Gathers a brief (depth, recency, source mix, audience) then launches a bundled Dynamic Workflow that fans out firecrawl retrieval into findings, gates rounds on coverage + contradictions, synthesizes a cited answer, and renders an evolving HTML report. Use for substantive research requests: 'research X', 'zbadaj/zresearchuj X', 'do a deep dive on', 'find out everything about', 'write me a report on', 'compare A vs B with sources', 'what does the evidence say about'. Invoke explicitly as /researcher:research \"\". Requires Dynamic Workflows enabled + the firecrawl MCP." +argument-hint: "" +disable-model-invocation: true +allowed-tools: Workflow AskUserQuestion Read Bash(echo:*) Bash(ls:*) Bash(test:*) Bash(cat:*) Bash(find:*) Bash(open:*) Bash(xdg-open:*) +--- + +# research — source-grounded research report + +Front-end orchestrator for the `researcher` workflow. Gather a brief, resolve where the report lives, launch the +bundled Dynamic Workflow, present the result, then run the follow-up checkpoint that extends the **evolving** report. + +The workflow does all the heavy lifting (retrieval, the gated round loop, synthesis, editing, HTML rendering) and +returns only a compact manifest + path — the verbose HTML never enters this conversation. + +## Context + +- Plugin root: !`echo "$CLAUDE_PLUGIN_ROOT"` +- Default output base: `./research` (one folder per report: `./research//`) + +## 0. Prerequisites (check once, fail clearly) + +This skill needs **Dynamic Workflows** enabled (Claude Code v2.1.154+, paid plan; on Pro they must be enabled +per-session) and the **firecrawl MCP** installed with `mcp__firecrawl__firecrawl_search` / `firecrawl_scrape` +allow-listed. If the `Workflow` tool is unavailable, tell the user to enable Dynamic Workflows and stop — do not +attempt an inline `Agent` fan-out (that defeats the whole design; see the plugin README). + +## 1. Get the question + +- Take the question from `$ARGUMENTS`. If empty, ask the user what they want researched, then continue. + +## 2. Resolve the brief (infer first, ask once) + +Infer every dimension from the question and context; **only ask about what you genuinely cannot infer**, in a +**single** consolidated `AskUserQuestion` call (the tool allows ≤4 sub-questions). Often you will ask 1 question or +none. Never spend a sub-question on language — it is deterministically detectable (see below). + +Dimensions and their option sets (list the inferred/sensible default **first**, label it `(Recommended)`): + +| Dimension | Options (recommended first varies by inference) | Effect | +|---|---|---| +| **Depth** | Standard (balanced, 2 rounds) · Quick (1 round) · Deep (3 rounds + Verifier) | sets `maxRounds`; Deep also runs the adversarial Verifier | +| **Recency** | Recent (~2 years) · Any (incl. foundational) · Latest (fast-moving, newest first) | biases retriever date filters + query terms | +| **Sources** | Broad (all types, trust-weighted) · Authoritative (primary + reputable only) · Technical-academic (docs/standards/papers) | biases sub-query planning + inclusion | +| **Audience** | Informed · Lay · Practitioner · Expert | calibrates the Editor only | + +**Audience tiers** (`lay` / `informed` / `practitioner` / `expert`): `lay` = general public; `informed` = generally +literate, not a specialist; `practitioner` = in the field but junior (knows the basics, not advanced terms or +abbreviations); `expert` = fluent in the jargon. Infer from the question's framing and any context (e.g. a project +CLAUDE.md). You may also pass a one-line free-form `descriptor` (e.g. "a PM evaluating vendors") to sharpen it. + +**Language** (decision: never ask): default = **the language of the question**. Detect it from the question text. +An explicit in-question directive overrides it (e.g. "…napisz raport po angielsku" / "…in English" → English even +if the question is Polish). Pass a clear language name (e.g. `Polish`, `English`). + +Smart defaults when you choose not to ask: depth `standard`, recency `any` (or `recent`/`latest` if the topic is +clearly time-sensitive), sources `broad`, audience `informed`. + +## 3. Resolve the report folder (no index file — the per-report `state.json` files are the registry) + +Reports live under `//`. There is **no global index** — discover existing reports by scanning the base +directory; each `state.json` is self-describing (`goal`, `brief`). + +1. **Within this session** — if you just finished a run and the user is following up, **reuse the folder path you + already hold** (set `extending: true`, same `slug`). No scan, no prompt. +2. **Fresh / new session** — derive `slug` = a slugified `goal` (lowercase, words joined by `-`). Then: + - `test -f //state.json`? If it exists, read its `goal`: + - **same topic** → tell the user a report already exists and offer **Extend** (continue it) vs **Fresh** (new report). Extend → `extending: true`. Fresh on a same-slug-different-angle → pick the next free `-2`, `-3`, … + - **different goal** sharing the slug (collision) → use `-2` (etc.) for the new report. + - To extend a **different** existing report, scan `/*/state.json`, list their goals, and let the user pick which to continue. +3. New report → `extending: false`. + +(`mkdir` for the folder is handled by the workflow's Setup step — you only resolve the path + the extend flag.) + +## 4. Launch the workflow + +Resolve the plugin root from the Context block above (call it `PLUGIN_ROOT`). Call the **Workflow** tool with the +bundled script by path and the brief as a real JSON object (not a stringified one): + +``` +Workflow({ + scriptPath: "/workflows/research.js", + args: { + goal: "", + depth: "quick|standard|deep", + recency: "recent|any|latest", + sources: "broad|authoritative|technical-academic", + audience: { tier: "lay|informed|practitioner|expert", descriptor: "" }, + language: "", + outputBase: "./research", + slug: "", + extending: , + pluginRoot: "" + } +}) +``` + +`pluginRoot` must be the resolved absolute path (the Composer copies `report.css` + `chart.umd.js` from +`/assets`). If the tool rejects `scriptPath`, fall back to `Read`-ing the file and passing its contents +as `script:` with the same `args`. + +The workflow may return an `error` field instead of a report (`no-goal`, `schema-mismatch`, `no-findings`, +`synthesis-failed`, `compose-failed`). Relay its `message` plainly and stop — do not retry blindly. + +## 5. Present the result + +On success the workflow returns `{ artifactPath, manifest: { title, sections[], sourceCount, roundCount }, gaps[], followups[], warnings[] }`. + +- Print the manifest **inline**: the title, the section list, and the source/round counts. +- Print the `artifactPath`. +- Surface any `warnings` (e.g. an image that failed to fetch, diagrams skipped because `mmdc` was unavailable). +- **Offer** to open it — do not auto-open: suggest `open ""` on macOS (`xdg-open` on Linux). + +## 6. Follow-up checkpoint (extend the evolving report) + +If `followups[]` is non-empty, run a blocking `AskUserQuestion` (`multiSelect: true`) listing the proposed follow-up +questions as individually selectable options, and let the user add their own (the auto-added "Other"). Then: + +- If the user selects/adds questions → build the next brief (their selection becomes the new `goal`; **inherit the + same `slug`, `extending: true`, and the prior `brief` defaults** so the report stays one coherent, single-language + document) and relaunch the workflow (step 4). The report is re-synthesized holistically and snapshotted before the + overwrite — this is the same evolving `output.html`, not a new file. +- If the user is done → stop. + +Mirror the calm, decision-handing-back style of the other orchestrator skills: surface and offer, don't unilaterally +declare the research "complete". diff --git a/plugins/researcher/workflows/research.js b/plugins/researcher/workflows/research.js new file mode 100644 index 0000000..27e2721 --- /dev/null +++ b/plugins/researcher/workflows/research.js @@ -0,0 +1,768 @@ +export const meta = { + name: 'researcher', + description: + 'Source-grounded research: fan out firecrawl retrievers (WebSearch fallback) into cited findings, gate rounds on coverage + contradictions (Conflict-scout + deep-only Verifier feeding a single Assessor), synthesize once, edit for the audience, and render an evolving HTML report. Returns only the artifact path + a compact manifest.', + phases: [ + { title: 'Setup', detail: 'load prior state.json (if extending), schemaVersion check, mmdc preflight' }, + { title: 'Plan', detail: 'derive distinct sub-query angles from the brief (diversity)' }, + { title: 'Research', detail: 'assessor-gated rounds: parallel retrievers → Conflict-scout → (deep) Verifier → Assessor' }, + { title: 'Synthesize', detail: 'one terminal Synthesizer reconciles findings into a cited, audience-neutral draft' }, + { title: 'Edit', detail: 'Editor re-cuts for concision + the brief audience tier; marks earn-their-place visuals' }, + { title: 'Compose', detail: 'Composer snapshots, copies assets, renders semantic HTML + diagrams/charts, writes state.json' }, + ], +} + +// ───────────────────────────── args / brief ────────────────────────────────── +// AGENTS-NOTE: args sometimes arrives JSON-encoded as a string instead of an object; a naive typeof +// check then drops the whole payload and silently runs against defaults. Mirror multi-skill-review.js. +let a = {} +if (args && typeof args === 'object') a = args +else if (typeof args === 'string') { try { a = JSON.parse(args) } catch { a = {} } } + +const SCHEMA_VERSION = 1 // bump when state.json shape changes; setup refuses a mismatched prior state + +// slugify is a script-side fallback only — the skill normally resolves and passes `slug` (decision J). +const slugify = (s) => String(s || 'research') + .toLowerCase().normalize('NFKD').replace(/[^\w\s-]/g, '') + .trim().replace(/[\s_-]+/g, '-').replace(/^-+|-+$/g, '').slice(0, 60) || 'research' + +const brief = { + goal: a.goal || (a.brief && a.brief.goal) || '', + depth: ['quick', 'standard', 'deep'].includes(a.depth) ? a.depth : 'standard', + recency: ['recent', 'any', 'latest'].includes(a.recency) ? a.recency : 'any', + sources: ['broad', 'authoritative', 'technical-academic'].includes(a.sources) ? a.sources : 'broad', + audience: { + tier: ['lay', 'informed', 'practitioner', 'expert'].includes(a.audience && a.audience.tier) + ? a.audience.tier : 'informed', + descriptor: (a.audience && a.audience.descriptor) || '', + }, + language: a.language || 'en', // the SKILL detects the question's language and passes it (decision K) +} + +const OUTPUT_BASE = a.outputBase || './research' +const SLUG = a.slug || slugify(brief.goal) +const REPORT_DIR = `${OUTPUT_BASE}/${SLUG}` +const EXTENDING = !!a.extending +const PLUGIN_ROOT = a.pluginRoot || '' // ${CLAUDE_PLUGIN_ROOT}; the Composer copies assets/ from here + +// Round budget per depth (ADR-0003: bounded by rounds, NOT a token budget). Fan-out cap is TUNABLE +// (PLAN §8 — settle during build); the probe ran 5 parallel retrievers comfortably. +const MAX_ROUNDS = ({ quick: 1, standard: 2, deep: 3 })[brief.depth] +const FANOUT = Number.isFinite(a.fanout) ? a.fanout : ({ quick: 3, standard: 5, deep: 6 })[brief.depth] +const RUN_VERIFIER = brief.depth === 'deep' // Verifier is depth-gated (ADR-0006) + +if (!brief.goal) return { error: 'no-goal', message: 'args.goal (the research question) is required.' } + +// ───────────────────────────── shared contracts (schemas) ──────────────────── +// These are the INTERNAL structured contract every stage passes (PLAN §4). Heavy HTML never travels +// through them — only structured findings/answer. Trust tiers + typed evidence spans are first-class. + +const TRUST_TIERS = ['primary', 'reputable-secondary', 'community'] // set by the Retriever at fetch +const EVIDENCE_KINDS = ['quote', 'image_region', 'locator'] // only `quote` is verbatim/string-checkable + +// One retriever's raw output. The dedup step (script-side, deterministic) assigns the global +// append-only `source_id` and flattens findings — so retrievers do NOT invent global ids. +const RETRIEVER_SCHEMA = { + type: 'object', + required: ['sources'], + properties: { + sources: { + type: 'array', + items: { + type: 'object', + required: ['url', 'title', 'access_date', 'trust_tier', 'findings'], + properties: { + url: { type: 'string' }, + title: { type: 'string' }, + access_date: { type: 'string', description: 'UTC date the source was fetched, YYYY-MM-DD (from `date -u`)' }, + trust_tier: { type: 'string', enum: TRUST_TIERS }, + candidate_image_urls: { type: 'array', items: { type: 'string' }, description: 'chart/infographic URLs worth considering for the report' }, + findings: { + type: 'array', + items: { + type: 'object', + required: ['claim', 'evidence'], + properties: { + claim: { type: 'string', description: 'one discrete factual claim' }, + evidence: { + type: 'array', + items: { + type: 'object', + required: ['kind', 'value'], + properties: { + kind: { type: 'string', enum: EVIDENCE_KINDS }, + value: { type: 'string', description: 'a `quote` is VERBATIM source text; `image_region` = url + alt/caption; `locator` = page/timestamp + paraphrase' }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + notes: { type: 'string', description: 'optional: retrieval issues, e.g. "firecrawl unsupported on X, used WebSearch"' }, + }, +} + +// Persisted + in-flight research state. A finding here is the FLAT form (carries global source_ids[]). +const STATE_SCHEMA = { + type: 'object', + required: ['schemaVersion', 'sources', 'findings', 'roundCount'], + properties: { + schemaVersion: { type: 'integer' }, + brief: { type: 'object' }, + goal: { type: 'string' }, + sources: { + type: 'array', + items: { + type: 'object', + required: ['source_id', 'url', 'title', 'trust_tier'], + properties: { + source_id: { type: 'integer' }, + url: { type: 'string' }, + title: { type: 'string' }, + access_date: { type: 'string' }, + trust_tier: { type: 'string', enum: TRUST_TIERS }, + candidate_image_urls: { type: 'array', items: { type: 'string' } }, + }, + }, + }, + findings: { + type: 'array', + items: { + type: 'object', + required: ['claim', 'source_ids', 'evidence'], + properties: { + claim: { type: 'string' }, + source_ids: { type: 'array', items: { type: 'integer' } }, + evidence: { type: 'array', items: { type: 'object', properties: { kind: { type: 'string' }, value: { type: 'string' } } } }, + }, + }, + }, + answer: { type: ['string', 'null'] }, + roundCount: { type: 'integer' }, + }, +} + +// Setup agent return: prior state (when extending) + environment preflight. +const SETUP_SCHEMA = { + type: 'object', + required: ['schemaOk', 'extending', 'mmdcRunner', 'diagramsAvailable'], + properties: { + schemaOk: { type: 'boolean', description: 'false iff a prior state.json exists with a schemaVersion this workflow cannot read' }, + extending: { type: 'boolean', description: 'true iff a usable prior state.json was loaded' }, + priorSchemaVersion: { type: ['integer', 'null'] }, + mmdcRunner: { type: 'string', description: 'command prefix to compile .mmd → .svg: "mmdc", "pnpm dlx @mermaid-js/mermaid-cli", "npx -y @mermaid-js/mermaid-cli", or "" if none' }, + diagramsAvailable: { type: 'boolean', description: 'false ⇒ Composer renders without diagrams + a note (graceful degrade)' }, + state: { ...STATE_SCHEMA, type: ['object', 'null'] }, + }, +} + +const PLAN_SCHEMA = { + type: 'object', + required: ['subQueries'], + properties: { + interpretation: { type: 'string', description: 'one line: how you read the goal' }, + subQueries: { + type: 'array', + description: 'DISTINCT angles so parallel retrievers do not converge on the same hit', + items: { + type: 'object', + required: ['angle', 'query'], + properties: { + angle: { type: 'string', description: 'what facet this covers (e.g. "official spec", "criticism", "benchmarks 2024+")' }, + query: { type: 'string', description: 'the search query string' }, + }, + }, + }, + }, +} + +const CONFLICT_SCHEMA = { + type: 'object', + required: ['conflicts'], + properties: { + conflicts: { + type: 'array', + items: { + type: 'object', + required: ['description', 'finding_ids', 'resolvable_hint'], + properties: { + description: { type: 'string', description: 'what contradicts what' }, + finding_ids: { type: 'array', items: { type: 'integer' } }, + source_ids: { type: 'array', items: { type: 'integer' } }, + resolvable_hint: { type: 'string', enum: ['likely', 'unlikely', 'unknown'], description: 'HINT only — materiality is the Assessor\'s call' }, + }, + }, + }, + }, +} + +const VERIFIER_SCHEMA = { + type: 'object', + required: ['refutations'], + properties: { + refutations: { + type: 'array', + items: { + type: 'object', + required: ['finding_id', 'verdict', 'basis'], + properties: { + finding_id: { type: 'integer' }, + verdict: { type: 'string', enum: ['stands', 'refuted', 'needs-evidence'], description: 'refuted ⇒ drop; needs-evidence ⇒ becomes a gap; stands ⇒ keep' }, + basis: { type: 'string', description: 'reasoning over the gathered corpus — the Verifier does NOT fetch' }, + }, + }, + }, + }, +} + +const ASSESSOR_SCHEMA = { + type: 'object', + required: ['sufficient', 'gaps', 'followups'], + properties: { + sufficient: { type: 'boolean' }, + reasoning: { type: 'string' }, + gaps: { type: 'array', items: { type: 'string' }, description: 'unmet coverage + material/resolvable conflicts + unresolved refutations — drive the next round' }, + followups: { type: 'array', items: { type: 'string' }, description: 'ready-to-use follow-up questions for the human checkpoint' }, + }, +} + +// ───────────────────────────── helpers (deterministic, no agent) ───────────── +// Dedup retriever outputs by URL and assign GLOBAL append-only source ids, continuing from the prior +// run's max id (never renumber — ADR-0005/0006). Returns the merged {sources, findings} for the round. +function mergeRound(state, retrieverOutputs) { + const byUrl = new Map(state.sources.map((s) => [s.url, s])) + let nextId = state.sources.reduce((m, s) => Math.max(m, s.source_id), 0) + 1 + for (const out of retrieverOutputs) { + if (!out || !Array.isArray(out.sources)) continue + for (const src of out.sources) { + if (!src || !src.url) continue + let existing = byUrl.get(src.url) + if (!existing) { + existing = { + source_id: nextId++, + url: src.url, + title: src.title || src.url, + access_date: src.access_date || '', + trust_tier: src.trust_tier || 'community', + candidate_image_urls: Array.isArray(src.candidate_image_urls) ? src.candidate_image_urls : [], + } + byUrl.set(src.url, existing) + state.sources.push(existing) + } else if (Array.isArray(src.candidate_image_urls)) { + for (const u of src.candidate_image_urls) if (!existing.candidate_image_urls.includes(u)) existing.candidate_image_urls.push(u) + } + for (const f of src.findings || []) { + if (!f || !f.claim) continue + state.findings.push({ claim: f.claim, source_ids: [existing.source_id], evidence: Array.isArray(f.evidence) ? f.evidence : [] }) + } + } + } + return state +} + +// Compact text view of accumulated findings/sources for prompts that reason over the corpus. +// Findings carry a #index so the Conflict-scout / Verifier / Assessor can reference them. +function corpusText(state) { + const srcLine = (s) => ` [${s.source_id}] (${s.trust_tier}) ${s.title} — ${s.url}` + const findLine = (f, i) => ` #${i} cites [${(f.source_ids || []).join('][')}]: ${f.claim}` + return `SOURCES (${state.sources.length}):\n${state.sources.map(srcLine).join('\n')}\n\nFINDINGS (${state.findings.length}):\n${state.findings.map(findLine).join('\n')}` +} + +// Synthesizer-only view: NO #index on findings (so the model never mistakes a finding's position for a +// citation) and an explicit list of the ONLY valid citation tokens — the SOURCE ids. +function synthCorpus(state) { + const srcLine = (s) => ` [${s.source_id}] (${s.trust_tier}) ${s.title} — ${s.url}` + const findLine = (f) => ` - ${f.claim} (supported by ${(f.source_ids || []).map((i) => `[${i}]`).join('') || '[?]'})` + const ids = state.sources.map((s) => s.source_id).join(', ') + return `SOURCES — the ONLY valid citation ids are: ${ids}\n${state.sources.map(srcLine).join('\n')}\n\nEVIDENCE (cite the SOURCE id(s) shown after each item — never a finding's position):\n${state.findings.map(findLine).join('\n')}` +} + +// ───────────────────────────── Setup ───────────────────────────────────────── +const setupPrompt = ` +You are the SETUP step for a research-report workflow. Use Bash/Read only. Do NOT research anything. + +Report directory: ${REPORT_DIR} +Extending an existing report: ${EXTENDING} +This workflow's state schemaVersion: ${SCHEMA_VERSION} + +Do exactly this: +1. Ensure the directory exists: \`mkdir -p ${REPORT_DIR}/diagrams ${REPORT_DIR}/assets ${REPORT_DIR}/snapshots\`. +2. Prior state: if ${REPORT_DIR}/state.json exists, Read it. + - If its schemaVersion !== ${SCHEMA_VERSION}: return schemaOk=false, extending=false, priorSchemaVersion=, state=null (the orchestrator will refuse rather than corrupt an evolving report). Do not migrate. + - Else return schemaOk=true, extending=true, priorSchemaVersion=, and state = the FULL parsed object (schemaVersion, brief, goal, sources[], findings[], answer, roundCount). + - If the file does not exist: schemaOk=true, extending=false, state=null. +3. mmdc preflight — pick the runner the Composer will use to compile Mermaid .mmd → .svg, in this order: + - if \`command -v mmdc\` succeeds → mmdcRunner="mmdc", diagramsAvailable=true + - else if \`command -v pnpm\` succeeds → mmdcRunner="pnpm dlx @mermaid-js/mermaid-cli", diagramsAvailable=true + - else if \`command -v npx\` succeeds → mmdcRunner="npx -y @mermaid-js/mermaid-cli", diagramsAvailable=true + - else → mmdcRunner="", diagramsAvailable=false + Do NOT actually invoke npx/pnpm dlx (it would download a package) — only detect availability. +Return the structured object. Nothing else.` + +phase('Setup') +const setup = await agent(setupPrompt, { label: 'setup', phase: 'Setup', schema: SETUP_SCHEMA }) + +if (setup && setup.schemaOk === false) { + return { + error: 'schema-mismatch', + message: `Existing report at ${REPORT_DIR} has state schemaVersion ${setup.priorSchemaVersion}, but this workflow speaks v${SCHEMA_VERSION}. Refusing to corrupt it — start a fresh slug or migrate manually.`, + artifactPath: REPORT_DIR, + } +} + +const isExtending = !!(setup && setup.extending && setup.state) +const mmdcRunner = (setup && setup.mmdcRunner) || '' +const diagramsAvailable = !!(setup && setup.diagramsAvailable) + +// The accumulating research state (prior, when extending; else empty). Mutated in place by the loop. +const state = isExtending + ? { ...setup.state, schemaVersion: SCHEMA_VERSION } + : { schemaVersion: SCHEMA_VERSION, brief, goal: brief.goal, sources: [], findings: [], answer: null, roundCount: 0 } + +log(`${isExtending ? `Extending (${state.sources.length} prior sources, round ${state.roundCount})` : 'Fresh report'} · depth=${brief.depth} maxRounds=${MAX_ROUNDS} fanout=${FANOUT} · diagrams=${diagramsAvailable ? mmdcRunner : 'OFF'}`) + +// ───────────────────────────── Plan (distinct sub-queries) ─────────────────── +const planPrompt = ` +You plan the FIRST research round for this brief. Produce up to ${FANOUT} DISTINCT sub-query angles so that +parallel retrievers do not all converge on the same popular result (a known failure mode). Cover different +facets — official/spec sources, independent analysis, criticism/limitations, recent developments, data/benchmarks — +as fits the goal. Do NOT retrieve anything; only plan. + +GOAL: ${brief.goal} +BRIEF: depth=${brief.depth}, recency=${brief.recency}, sourcePreference=${brief.sources}, language=${brief.language} +${isExtending ? `\nThis EXTENDS an existing report. Bias the angles toward GAPS and newer material, not what is already covered:\n${corpusText(state)}` : ''} + +Recency guidance: recent ⇒ favour ~last 2 years; latest ⇒ fast-moving, prioritise newest; any ⇒ include foundational. +Source guidance: broad ⇒ all types (trust-weighted); authoritative ⇒ primary + reputable only; technical-academic ⇒ docs/standards/papers. +Return the structured object.` + +phase('Plan') +const plan = await agent(planPrompt, { label: 'plan:sub-queries', phase: 'Plan', schema: PLAN_SCHEMA }) +const subQueries = (plan && Array.isArray(plan.subQueries) && plan.subQueries.length) + ? plan.subQueries.slice(0, FANOUT) + : [{ angle: 'general', query: brief.goal }] +log(`Planned ${subQueries.length} sub-query angles: ${subQueries.map((q) => q.angle).join(', ')}`) + +// ───────────────────────────── round-stage prompts ────────────────────────── +const RECENCY_HINT = { + recent: 'favour material from roughly the last 2 years', + latest: 'this is a fast-moving topic — prioritise the newest available material', + any: 'include foundational/older material where it is authoritative', +}[brief.recency] +const SOURCE_HINT = { + broad: 'all source types, weighted by trust', + authoritative: 'primary + reputable-secondary only; skip community/forum sources', + 'technical-academic': 'documentation, standards, and peer-reviewed papers', +}[brief.sources] + +const retrieverPrompt = (sq, round) => ` +You are a RESEARCH RETRIEVER. Cover ONE angle of the goal: find real web sources, read the best ones, and +extract grounded findings. Do NOT synthesise or write prose — just gather and extract. + +GOAL: ${brief.goal} +YOUR ANGLE: ${sq.angle} +STARTING QUERY (refine freely): ${sq.query} +Recency: ${RECENCY_HINT}. Sources: ${SOURCE_HINT}. Round ${round}. + +Tools: FIRST load firecrawl — ToolSearch with query "select:firecrawl_search,firecrawl_scrape". Use firecrawl_search +to find candidate pages, then firecrawl_scrape to read the 2-4 most relevant. If firecrawl errors or a site is +unsupported (e.g. reddit.com — "site not supported"), FALL BACK to WebSearch (ToolSearch "select:WebSearch") and +work from its snippets. Record any fallback in \`notes\`. + +For every source you actually used: +- url, title, and a trust_tier: + "primary" = official docs/specs/standards, or the primary actor's own statement; + "reputable-secondary"= established press / well-known organisations; + "community" = forums, personal blogs, unverified posts. +- access_date: run \`date -u +%Y-%m-%d\` and use its output (workflow scripts cannot produce dates — you must). +- candidate_image_urls: URLs of charts/infographics on the page worth showing in a report (else []). +- findings: discrete factual claims, each with an evidence span: + prefer kind="quote" — the EXACT verbatim text from the scrape (auditable by construction); + kind="image_region" (value = url + alt/caption) for a chart/figure; + kind="locator" (value = page/section/timestamp + your paraphrase) for paywalled/non-text — explicitly non-verbatim. +Quality over volume — a handful of well-supported findings beats many thin ones. Never fabricate a quote. +Return the structured object.` + +const conflictScoutPrompt = (st) => ` +You are the CONFLICT-SCOUT. DETECT contradictions among the accumulated findings — claims that cannot both hold, +or that materially disagree. You do NOT judge materiality (that is the Assessor's job) and you do NOT fetch or +fact-check against the world — you only compare findings against EACH OTHER. Reference findings by their #index below. +For each conflict, HINT whether more retrieval could likely resolve it (likely / unlikely / unknown). + +${corpusText(st)} + +Return conflicts[] (empty array if the findings are mutually consistent).` + +const verifierPrompt = (st) => ` +You are the VERIFIER (this is a DEEP brief). Adversarially try to REFUTE the material findings, reasoning ONLY over +the gathered corpus — other findings, source trust tiers, internal logic. You do NOT fetch anything. Reference by #index. +For each finding you challenge, give a verdict: + "refuted" = the corpus shows it is wrong or unsupported → it will be DROPPED; + "needs-evidence" = cannot be settled without fresh counter-evidence → becomes a GAP for another round; + "stands" = survives scrutiny. +Only list findings you actually challenge; anything unlisted is assumed to stand. + +${corpusText(st)} + +Return refutations[].` + +const assessorPrompt = (st, conflicts, refutations, round) => ` +You are the ASSESSOR — the loop's SINGLE gate. Decide whether another research round is warranted. + +GOAL: ${brief.goal} +Planned facets: ${subQueries.map((q) => q.angle).join(', ')} +This is round ${round} of at most ${MAX_ROUNDS}. Depth intent: ${brief.depth} (a deep brief biases toward more rounds). + +${corpusText(st)} + +Conflicts detected this round: +${JSON.stringify(conflicts, null, 2)} +${RUN_VERIFIER ? `Verifier refutations this round:\n${JSON.stringify(refutations, null, 2)}` : '(no Verifier — quick/standard brief)'} + +Judge each conflict's / refutation's MATERIALITY against the goal + planned facets. A material AND resolvable conflict, +or an unresolved (needs-evidence) refutation, is itself a gap. Green-light (sufficient=true) ONLY when coverage is +sufficient AND no material, resolvable conflict or refutation remains. Otherwise set sufficient=false and list gaps[] +that are SPECIFIC and retrievable (they become next round's retrievers). Always propose 2-4 ready-to-use followups[] +for the human checkpoint, even when sufficient. +Return the structured object.` + +// ───────────────────────────── assessor-gated round loop ───────────────────── +phase('Research') +let assessment = null +for (let round = 1; round <= MAX_ROUNDS; round++) { + const queries = round === 1 + ? subQueries + : ((assessment && assessment.gaps) || []).slice(0, FANOUT).map((g, i) => ({ angle: `gap-${i + 1}`, query: g })) + if (!queries.length) { log(`Round ${round}: no gaps to chase — stopping`); break } + + log(`Round ${round}/${MAX_ROUNDS}: ${queries.length} parallel retrievers`) + const outputs = await parallel(queries.map((sq) => () => + agent(retrieverPrompt(sq, round), { label: `retrieve:${sq.angle}`.slice(0, 38), phase: 'Research', schema: RETRIEVER_SCHEMA }))) + + const before = state.findings.length + mergeRound(state, outputs.filter(Boolean)) + state.roundCount++ + const newFindings = state.findings.length - before + log(`Round ${round}: +${newFindings} findings (total ${state.findings.length}) from ${state.sources.length} sources`) + + // Conflict-scout — detection only, every round, after dedup. + const scout = await agent(conflictScoutPrompt(state), { label: 'conflict-scout', phase: 'Research', schema: CONFLICT_SCHEMA }) + const conflicts = (scout && scout.conflicts) || [] + + // Verifier — depth-gated; reasons over the corpus, drops refuted findings. + let refutations = [] + if (RUN_VERIFIER) { + const v = await agent(verifierPrompt(state), { label: 'verifier', phase: 'Research', schema: VERIFIER_SCHEMA }) + refutations = (v && v.refutations) || [] + const refutedIdx = refutations + .filter((r) => r.verdict === 'refuted' && Number.isInteger(r.finding_id)) + .map((r) => r.finding_id) + .sort((x, y) => y - x) // descending so earlier splices don't shift later indices + for (const idx of refutedIdx) if (idx >= 0 && idx < state.findings.length) state.findings.splice(idx, 1) + if (refutedIdx.length) log(`Round ${round}: Verifier dropped ${refutedIdx.length} refuted findings`) + } + + // Assessor — the single gate. + assessment = await agent(assessorPrompt(state, conflicts, refutations, round), { label: 'assessor', phase: 'Research', schema: ASSESSOR_SCHEMA }) + const sufficient = assessment ? assessment.sufficient : true + log(`Round ${round}: Assessor ${sufficient ? 'GREEN — coverage sufficient' : `wants more (${((assessment && assessment.gaps) || []).length} gaps)`}`) + if (sufficient) break + if (newFindings === 0 && round > 1) { log('No new findings this round — stopping to avoid spinning'); break } +} + +const gaps = (assessment && assessment.gaps) || [] +const followups = (assessment && assessment.followups) || [] + +if (!state.findings.length) { + return { error: 'no-findings', message: 'Retrieval produced no usable findings (firecrawl/WebSearch may be unavailable or the query too narrow).', artifactPath: REPORT_DIR, gaps, followups } +} + +// ───────────────────────────── Synthesize + Edit ──────────────────────────── +const SYNTH_SCHEMA = { + type: 'object', + required: ['title', 'answer'], + properties: { + title: { type: 'string', description: 'concise report title derived from the goal' }, + answer: { type: 'string', description: 'markdown with ## section headings and inline [id] citations; audience-NEUTRAL (full, faithful argument)' }, + residual_conflicts: { + type: 'array', + items: { + type: 'object', + required: ['description'], + properties: { description: { type: 'string' }, source_ids: { type: 'array', items: { type: 'integer' } } }, + }, + }, + }, +} + +const EDITOR_SCHEMA = { + type: 'object', + required: ['title', 'answer', 'sections', 'visuals'], + properties: { + title: { type: 'string' }, + answer: { type: 'string', description: 'edited markdown; each marked visual is a {{VISUAL:N}} token on its own line' }, + sections: { type: 'array', items: { type: 'string' }, description: 'final section heading titles in order (drives the ToC + manifest)' }, + visuals: { + type: 'array', + items: { + type: 'object', + required: ['id', 'type', 'intent', 'spec'], + properties: { + id: { type: 'integer', description: 'matches the {{VISUAL:N}} token in answer' }, + type: { type: 'string', enum: ['diagram', 'chart', 'table', 'image'] }, + intent: { type: 'string', description: 'why this visual earns its place' }, + spec: { type: 'string', description: 'WHAT to show: for diagram/chart/table the actual data/relationships (Composer authors the Mermaid/Chart.js/HTML); for image, the exact source image URL to download' }, + caption: { type: 'string' }, + source_ids: { type: 'array', items: { type: 'integer' }, description: 'provenance; REQUIRED for type=image (the source the image is attributed to)' }, + }, + }, + }, + open_questions: { type: 'array', items: { type: 'string' }, description: '0-4 questions worth flagging to the reader (curated from the assessor gaps)' }, + cut_summary: { type: 'string' }, + }, +} + +const TIER_GUIDE = { + lay: 'LAY reader: define jargon inline, lead with intuition and concrete analogies, and cut expert-only nuance. Lean on more visuals.', + informed: 'INFORMED reader: assume general literacy but DEFINE field-specific terms on first use.', + practitioner: 'PRACTITIONER (in the field but junior): assume the basics, but still DEFINE advanced terms and EXPAND abbreviations on first use.', + expert: 'EXPERT: assume the terminology including abbreviations, trim background, and foreground caveats and edge cases. Fewer visuals.', +}[brief.audience.tier] + +const synthPrompt = ` +You are the SYNTHESIZER — the reasoning core. You run ONCE, now that the Assessor has green-lit coverage. Read the FULL +findings + sources below and compose a single coherent, CITED draft answer to the goal. + +GOAL: ${brief.goal} +Write the answer in this language: ${brief.language}. +${isExtending ? 'This EXTENDS an existing report — re-synthesise the WHOLE answer from ALL accumulated findings (holistic, per ADR-0005), not just new material. The findings are the source of truth.' : ''} + +${synthCorpus(state)} + +Rules: +- Reconcile findings where they can be reconciled. Where a contradiction is irreducible, SURFACE it with attribution + (never hide it) and also list it in residual_conflicts. +- Every non-trivial claim cites its SOURCE id(s) inline as [id] or [id][id] (e.g. "throughput doubled [3][7]"). The ONLY + valid citation tokens are the source ids listed above (1..${state.sources.length}). NEVER cite a finding's position or a + number outside that set — a citation always points at a SOURCE. Reuse the [id]s shown beside each piece of evidence. +- Compose AUDIENCE-NEUTRAL: the full, faithful argument with all nuance and caveats. Do NOT trim for a reader — that is + the Editor's job next. +- Structure with ## section headings; lead with a direct answer to the goal, then the support. +Return: title, answer (markdown with [id] citations), residual_conflicts[].` + +// Compact inventory of source images the Editor may pull in — only when one is genuinely irreplaceable. +const candidateImages = state.sources + .flatMap((s) => (s.candidate_image_urls || []).map((u) => ({ source_id: s.source_id, url: u }))) + .slice(0, 30) + +const editPrompt = (synth) => ` +You are the EDITOR — the SOLE audience-aware stage and an independent second pair of eyes. Re-cut the draft for concision +and for THIS reader; mark where a visual genuinely earns its place. Never flatten accuracy or drop a citation. + +AUDIENCE: ${TIER_GUIDE}${brief.audience.descriptor ? ` Specifically: ${brief.audience.descriptor}.` : ''} +Keep the answer in this language: ${brief.language}. + +DRAFT ANSWER: +${synth.answer} + +Residual conflicts to keep visible (surface with attribution): ${JSON.stringify((synth && synth.residual_conflicts) || [])} +Assessor's open questions (curate + phrase for this reader): ${JSON.stringify(gaps)} +Source images available (mark type="image" ONLY if one is irreplaceable — otherwise RECONSTRUCT as a chart/diagram/table): ${JSON.stringify(candidateImages)} + +Tasks: +1. Cut redundancy, filler and waffle hard; keep every cited claim and its [id] citations intact.${isExtending ? ' Treat the draft as a wholesale re-cut of the ENTIRE document, not an append.' : ''} +2. Adapt density and jargon to the audience above. +3. Mark visuals that genuinely help (more for a lay reader, fewer for an expert): put a placeholder token {{VISUAL:N}} on + its OWN line where each belongs, and describe it in visuals[] — type, intent, spec, caption, source_ids. The Composer + renders ONLY what you mark here. Prefer RECONSTRUCTING a visual from the findings (a "chart" for quantitative data, a + "diagram" for flows/relationships, a "table" for structured comparisons) over pulling a source "image"; reserve "image" + for a figure that genuinely cannot be reconstructed (always attribute it via source_ids).${diagramsAvailable ? '' : ' NOTE: Mermaid diagrams are UNAVAILABLE this run (no mmdc) — do NOT use type "diagram"; prefer "table" or "chart".'} +4. open_questions[]: 0-4 questions worth flagging to the reader (curated from the assessor's). +Return: title, answer (edited markdown with {{VISUAL:N}} tokens), sections[], visuals[], open_questions[], cut_summary.` + +phase('Synthesize') +const synth = await agent(synthPrompt, { label: 'synthesize', phase: 'Synthesize', schema: SYNTH_SCHEMA }) +if (!synth || !synth.answer) { + return { error: 'synthesis-failed', message: 'The Synthesizer produced no answer.', artifactPath: REPORT_DIR, gaps, followups } +} + +phase('Edit') +const edited = await agent(editPrompt(synth), { label: 'edit', phase: 'Edit', schema: EDITOR_SCHEMA }) +const doc = (edited && edited.answer) + ? edited + : { title: synth.title, answer: synth.answer, sections: [], visuals: [], open_questions: gaps.slice(0, 4), cut_summary: 'editor unavailable — rendered the synthesizer draft as-is' } +state.answer = doc.answer + +// ───────────────────────────── Persist (snapshot + state.json) ────────────── +// Done BEFORE rendering and separately from it: state.json is the source of truth and must survive even if the +// large (occasionally flaky) HTML render drops mid-response. The prior report is snapshotted here too — once — so a +// compose retry never snapshots a same-run intermediate. +const PERSIST_SCHEMA = { + type: 'object', + required: ['stateWritten'], + properties: { stateWritten: { type: 'boolean' }, snapshotMade: { type: 'boolean' } }, +} +const persistPrompt = ` +You are the PERSIST step. Using Bash/Write only, do TWO things under ${REPORT_DIR}, then return. Do NOT render HTML. +1. SNAPSHOT — only if a prior output.html exists (freeze it BEFORE the renderer overwrites it): + cd ${REPORT_DIR} + if [ -f output.html ]; then ts=$(date -u +%Y%m%dT%H%M%SZ); sed -e 's#="assets/#="../assets/#g' -e 's#="diagrams/#="../diagrams/#g' output.html > snapshots/output.$ts.html; fi + Use this redirect form, NOT 'sed -i' (its syntax differs macOS vs Linux). The snapshot shares the LIVE ../assets/ + while its content stays frozen. Set snapshotMade true iff you wrote a snapshot. +2. STATE — write ${REPORT_DIR}/state.json containing EXACTLY this JSON, verbatim: +${JSON.stringify(state)} +Return { stateWritten, snapshotMade }.` + +phase('Compose') +// state.json is the source of truth; retry hard (writes are cheap and idempotent) and ABORT before rendering if it +// cannot be persisted — keeping the prior consistent report beats rendering a v2 view over a v1 state. +let persisted = null +for (let attempt = 1; attempt <= 3 && !(persisted && persisted.stateWritten); attempt++) { + if (attempt > 1) log(`Persist (state.json) attempt ${attempt} — retrying`) + persisted = await agent(persistPrompt, { label: attempt > 1 ? `persist#${attempt}` : 'persist', phase: 'Compose', schema: PERSIST_SCHEMA }) +} +if (!persisted || !persisted.stateWritten) { + return { error: 'persist-failed', message: 'Could not write state.json after retries — aborted before render to keep the prior report and its state consistent. Re-run to retry.', artifactPath: `${REPORT_DIR}/output.html`, gaps, followups } +} + +// ───────────────────────────── Compose (pure render, retryable) ───────────── +const COMPOSER_SCHEMA = { + type: 'object', + required: ['artifactPath', 'manifest'], + properties: { + artifactPath: { type: 'string', description: 'absolute or report-relative path to the written output.html' }, + manifest: { + type: 'object', + required: ['title', 'sections', 'sourceCount', 'roundCount'], + properties: { + title: { type: 'string' }, + sections: { type: 'array', items: { type: 'string' } }, + sourceCount: { type: 'integer' }, + roundCount: { type: 'integer' }, + }, + }, + snapshotMade: { type: 'boolean' }, + diagrams: { type: 'integer' }, + charts: { type: 'integer' }, + tables: { type: 'integer' }, + imagesFetched: { type: 'integer' }, + warnings: { type: 'array', items: { type: 'string' } }, + }, +} + +const sourcesForRender = state.sources.map((s) => ({ + source_id: s.source_id, url: s.url, title: s.title, access_date: s.access_date, trust_tier: s.trust_tier, +})) +const hasChart = (doc.visuals || []).some((v) => v && v.type === 'chart') + +const composerPrompt = ` +You are the COMPOSER — the final stage. Render the editor-approved answer as the HTML Report at ${REPORT_DIR}/output.html, +plus its sidecar diagrams/ and assets/. Do ALL file I/O yourself (Bash / Write / curl); the verbose HTML must NOT be +returned — return only the compact manifest. You author NO CSS and NO bespoke