diff --git a/.github/workflows/provider-dialect-smoke.yml b/.github/workflows/provider-dialect-smoke.yml new file mode 100644 index 00000000..5587018f --- /dev/null +++ b/.github/workflows/provider-dialect-smoke.yml @@ -0,0 +1,68 @@ +name: Provider Dialect Smoke + +on: + workflow_dispatch: + inputs: + providers: + description: 'Comma-separated provider list (anthropic,openai,bedrock,github). Defaults to all.' + required: false + default: '' + model: + description: 'Optional model override applied to every provider.' + required: false + default: '' + schedule: + # Nightly at 03:17 UTC. Off-peak; staggered minute keeps us out of the top-of-hour herd. + - cron: '17 3 * * *' + +permissions: + contents: read + +concurrency: + group: provider-dialect-smoke + cancel-in-progress: false + +jobs: + smoke: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Setup Node.js + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6 + with: + node-version: 20.x + cache: npm + + - name: Install dependencies + run: npm ci + + - name: Run provider-dialect smoke matrix + env: + # Secret names mirror env-var names; runtime reads these via src/config/env.ts. + # Missing secrets cause that provider to be skipped (warn), not failed. + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} + GITHUB_API_KEY: ${{ secrets.GITHUB_API_KEY }} + AWS_REGION: ${{ secrets.AWS_REGION }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + PROVIDERS_INPUT: ${{ inputs.providers }} + MODEL_INPUT: ${{ inputs.model }} + run: | + ARGS=() + if [ -n "$PROVIDERS_INPUT" ]; then ARGS+=(--providers="$PROVIDERS_INPUT"); fi + if [ -n "$MODEL_INPUT" ]; then ARGS+=(--model="$MODEL_INPUT"); fi + npm run test:smoke -- "${ARGS[@]}" + + - name: Upload run log + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: smoke-run-log-${{ github.run_id }} + path: evals/logs/smoke_*.json + if-no-files-found: warn + retention-days: 30 diff --git a/.gitignore b/.gitignore index bd1b50e1..eefde8eb 100644 --- a/.gitignore +++ b/.gitignore @@ -38,6 +38,9 @@ evals/logs/ evals/datasets/crb/benchmark_data.json evals/datasets/crb/repos/ +# Provider-dialect smoke harness scratch dir (per-run temp .qualopsrc.*.json files) +tests/smoke/.tmp/ + # Logs *.log npm-debug.log* diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fbf41ce..101a4f55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,18 +11,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `skipPatterns` config field is now fully functional as a pre-filter: excluded files never reach the review pipeline in file-by-file mode, and agentic tool calls (`read_file`, `grep_files`, `glob_files`) enforce patterns at the handler layer for both OpenAI and Anthropic providers. - Anthropic agentic mode now uses MCP tools for file access instead of SDK built-ins, ensuring `skipPatterns` enforcement is consistent across providers. - `globFiles` tool upgraded from `find`-based to `glob` npm package for proper `**` glob support. - -### Changed - Default `skipPatterns` in `ConfigService` changed from infrastructure dirs to empty (`[]`) — patterns are project-specific and should be set per project. qualops's own `.qualopsrc.json` now lists its TS-specific patterns. - Removed `file-exclusions.ts` (dead code — `applyPenalty()` was never called). +- Provider-dialect smoke spec: `npm run test:smoke` runs the 4 AI caller stages migrated in PR #145 (`file-reviewer`, `validation-resolver`, `dedup-resolver`, `root-cause-extract`) against each real provider (`anthropic`, `openai`, `bedrock`, `github`) using a slice fixture as input. Validates that the structured-output dialect path returns a zod-validated response without throwing. Implemented as a Jest spec under `tests/smoke/` with its own `jest.smoke.config.ts` — not picked up by default `npm test` (whose `roots` are limited to `tests/unit/`). Provider config comes from `ConfigService` + the existing `PROVIDER_DEFAULTS` table, not a duplicated table. Providers with missing credentials are `describe.skip()`-ed; providers with malformed credentials fail loudly via the provider class's own `validateApiKey()`. Input is a slice fixture under `evals/datasets/inbox/smoke-sql-injection/`, loosely following TDR 0002. Nightly + manual CI workflow at `.github/workflows/provider-dialect-smoke.yml`. Automates the unchecked manual smoke item from PR #145's test plan; distinct from the deferred per-stage golden-evals item which validates output quality. ## [0.2.3] - 2026-05-28 ### Changed - CRB evals are now self-contained slice directories (`evals/datasets/crb//slice.json` + `repo/`) — no external repo cloning required. Replaced `fetch-crb-dataset.ts` with `check-crb-staleness.ts` which validates local slices against upstream CRB PR URLs. - Neutralize language-specific wording in built-in prompts where the underlying tooling is genuinely language-agnostic, so review output is no longer TypeScript-flavored when qualops is pointed at a non-TS repo. - -### Changed - Bump `@anthropic-ai/claude-agent-sdk` from 0.2.139 to 0.3.144. - Bump `@anthropic-ai/claude-agent-sdk-linux-x64` from 0.2.139 to 0.3.144. - Bump `@opentelemetry/sdk-node` from 0.217.0 to 0.218.0. @@ -40,6 +37,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New `BaseAIProvider` consolidating shared token accounting + cost computation while preserving exact per-provider semantics (OpenAI `prompt_tokens` incl. cached, Anthropic/Bedrock `input_tokens` excl. cached; Bedrock log policy unchanged). - New `ProviderCapabilities` descriptor that routes `(provider, model)` to the right structured-output dialect, replacing model-name string sniffing. - Reusable zod schemas in `src/ai/shared/schemas/` for review issues, validation results, dedup indices, search/replace fixes, and root-cause classifications. +- Agentic mode now supports OpenAI and Azure OpenAI providers via `@openai/agents`. Set `provider: "openai"` in your stage config to use the OpenAI adapter; set `OPENAI_BASE_URL` to an Azure endpoint and the correct Azure client is used automatically. +- You can now specify a model and provider together in stage config using `model: { provider: "openai", name: "gpt-4o" }` instead of relying on a separate top-level `provider` field. +- OpenTelemetry observability instrumentation across the full review pipeline (file-by-file, agentic, and eval runs), with auto-detection for Langfuse and generic OTLP backends. All span attributes are sanitized to prevent credential leakage. +- Agentic jobs now support a `prompt` field for file-based prompt instructions, combined with the existing inline `systemPrompt` +- GitHub Models AI provider (`provider: "github"`) via `https://models.github.ai/inference` +- Zod-based runtime validation for `.qualopsrc.json` with deprecation warnings for legacy fields +- JSON Schema generated from Zod schemas (`npm run generate:schema`) replacing hand-maintained schema +- Eval `--severity` filter to run only CRB cases with matching golden comment severity +- Report on eval flakiness for Code Review Benchmark `npm run eval:recall-report` with filtering options `-- --severity=critical` +- `init-claude` now scaffolds a validated default config, quality prompt, and supports `--provider` flag +- New `Promote to Stable` workflow (`workflow_dispatch`) for promoting a beta release to a clean stable version +- New `update-beta-ref` and `update-stable-ref` jobs in the npm publish workflow that force-move the `beta` / `stable` lightweight git tags after each release +- `docs/tdr/` folder for Technical Design Records, with TDR 0001 documenting the release process +- New `Releases` page on the docs site explaining the two-tier model to consumers ### Changed - `AIProvider.complete` is now overloaded: `complete(opts & { schema: S })` returns `AIResponse>` (schema-typed); plain `complete(opts)` still returns `AIResponse`. @@ -52,6 +63,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Release failure issues now include the failing stages and release kind (beta vs stable) - Normalize `uses: eggai-tech/qualops@v1` examples across the README, docs, and example workflows to `@stable` - Refactor agentic tools: `tools/index.ts` is now a provider-agnostic registry (`createToolSet`); Anthropic and OpenAI SDK wiring stays inside their respective adapters +- AI provider types/factory now include `github` and use stricter provider typing +- Environment config and test setup now include `GITHUB_API_KEY` +- Update documentation to reference the new JSON Schema and provide configuration examples +- Added eval suite ### Removed - Deleted `JsonParser` class and the duplicated private `fixMalformedJson` (last production callers migrated). @@ -70,28 +85,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Release version validation now allows only the prerelease labels the publish workflow recognises (`rc`, `alpha`, `beta`); unrecognised labels like `0.3.0-preview.1` are rejected up-front instead of silently publishing to `latest` - `Promote to Stable` workflow now asserts that `stable_version` equals `beta_version`'s base (e.g., `0.4.0-beta.1` can only promote to `0.4.0`) -### Added -- Agentic mode now supports OpenAI and Azure OpenAI providers via `@openai/agents`. Set `provider: "openai"` in your stage config to use the OpenAI adapter; set `OPENAI_BASE_URL` to an Azure endpoint and the correct Azure client is used automatically. -- You can now specify a model and provider together in stage config using `model: { provider: "openai", name: "gpt-4o" }` instead of relying on a separate top-level `provider` field. -- OpenTelemetry observability instrumentation across the full review pipeline (file-by-file, agentic, and eval runs), with auto-detection for Langfuse and generic OTLP backends. All span attributes are sanitized to prevent credential leakage. -- Agentic jobs now support a `prompt` field for file-based prompt instructions, combined with the existing inline `systemPrompt` -- GitHub Models AI provider (`provider: "github"`) via `https://models.github.ai/inference` -- Zod-based runtime validation for `.qualopsrc.json` with deprecation warnings for legacy fields -- JSON Schema generated from Zod schemas (`npm run generate:schema`) replacing hand-maintained schema -- Eval `--severity` filter to run only CRB cases with matching golden comment severity -- Report on eval flakiness for Code Review Benchmark `npm run eval:recall-report` with filtering options `-- --severity=critical` -- `init-claude` now scaffolds a validated default config, quality prompt, and supports `--provider` flag -- New `Promote to Stable` workflow (`workflow_dispatch`) for promoting a beta release to a clean stable version -- New `update-beta-ref` and `update-stable-ref` jobs in the npm publish workflow that force-move the `beta` / `stable` lightweight git tags after each release -- `docs/tdr/` folder for Technical Design Records, with TDR 0001 documenting the release process -- New `Releases` page on the docs site explaining the two-tier model to consumers - -### Changed -- AI provider types/factory now include `github` and use stricter provider typing -- Environment config and test setup now include `GITHUB_API_KEY` -- Update documentation to reference the new JSON Schema and provide configuration examples -- Added eval suite - ## [0.2.1] - 2026-03-14 ### Changed diff --git a/README.md b/README.md index 2411a703..1cb5220b 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,24 @@ Reference in `.qualopsrc.json`: } ``` +## Testing + +### Unit tests + +```bash +npm test +``` + +### Provider-dialect smoke tests + +Real-API tests that exercise the 4 AI caller stages (`file-reviewer`, `validation-resolver`, `dedup-resolver`, `root-cause-extract`) against each supported provider. Validates that the structured-output dialect path returns a zod-validated response without throwing. Providers without credentials are skipped automatically. + +```bash +npm run test:smoke +``` + +See [`tests/smoke/README.md`](./tests/smoke/README.md) for details on env vars and CI setup. + ## License MIT diff --git a/evals/datasets/inbox/smoke-sql-injection/repo/src/api/users.ts b/evals/datasets/inbox/smoke-sql-injection/repo/src/api/users.ts new file mode 100644 index 00000000..39781135 --- /dev/null +++ b/evals/datasets/inbox/smoke-sql-injection/repo/src/api/users.ts @@ -0,0 +1,8 @@ +import { Request, Response } from 'express'; +import { db } from '../db'; + +export async function getUser(req: Request, res: Response) { + const userId = req.params.id; + const result = await db.query(`SELECT * FROM users WHERE id = '${userId}'`); + res.json(result.rows[0]); +} diff --git a/evals/datasets/inbox/smoke-sql-injection/slice.json b/evals/datasets/inbox/smoke-sql-injection/slice.json new file mode 100644 index 00000000..48cad7cf --- /dev/null +++ b/evals/datasets/inbox/smoke-sql-injection/slice.json @@ -0,0 +1,20 @@ +{ + "id": "smoke-sql-injection", + "language": "typescript", + "filePath": "src/api/users.ts", + "diff": "@@ -10,6 +10,12 @@\n import { db } from '../db';\n \n+export async function getUser(req: Request, res: Response) {\n+ const userId = req.params.id;\n+ const result = await db.query(`SELECT * FROM users WHERE id = '${userId}'`);\n+ res.json(result.rows[0]);\n+}\n+", + "purpose": "smoke", + "capturedAt": "2026-05-21", + "capturedBy": "provider-dialect-smoke-harness", + "note": "Synthetic input for the provider-dialect smoke harness. Not a captured real-world miss. Loosely follows TDR 0002 slice layout (slice.json + repo/ tree) so future smoke fixtures can be migrated to the full inbox eval format if the slice harness lands.", + "expected": [ + { + "file": "src/api/users.ts", + "line": 6, + "lineEnd": 6, + "type": "security", + "severity": "critical", + "description": "SQL injection via string interpolation in query" + } + ] +} diff --git a/jest.smoke.config.ts b/jest.smoke.config.ts new file mode 100644 index 00000000..5d784909 --- /dev/null +++ b/jest.smoke.config.ts @@ -0,0 +1,27 @@ +export default { + displayName: 'qualops-smoke', + preset: './jest.preset.js', + testEnvironment: 'node', + setupFilesAfterEnv: ['/tests/setup/smoke.setup.ts'], + roots: ['/tests/smoke'], + globals: {}, + testMatch: ['/tests/smoke/**/*.spec.ts'], + transform: { + '^.+\\.(ts|mjs|js)$': [ + 'ts-jest', + { + tsconfig: '/tsconfig.spec.json', + useESM: true, + }, + ], + }, + moduleFileExtensions: ['ts', 'js', 'mjs'], + extensionsToTreatAsEsm: ['.ts'], + moduleNameMapper: { + '^@/(.*)$': '/src/$1', + '^@tests/(.*)$': '/tests/$1', + '^(\\.{1,2}/.*)\\.js$': '$1', + }, + transformIgnorePatterns: ['node_modules/(?!.*\\.mjs$)'], + maxWorkers: 1, +}; diff --git a/package.json b/package.json index d7414ebb..c1d59ef9 100644 --- a/package.json +++ b/package.json @@ -77,6 +77,7 @@ "eval:upload:qualops": "npx tsx evals/src/upload-datasets.ts --source=qualops", "eval:upload:crb:all": "npx tsx evals/src/upload-datasets.ts --source=crb", "eval:recall-report": "npx tsx evals/src/recall-report.ts", + "test:smoke": "jest --config jest.smoke.config.ts", "generate:schema": "ts-node --transpile-only --project tsconfig.lib.json scripts/generate-config-schema.ts" }, "dependencies": { diff --git a/tests/setup/smoke.setup.ts b/tests/setup/smoke.setup.ts new file mode 100644 index 00000000..d84f27e3 --- /dev/null +++ b/tests/setup/smoke.setup.ts @@ -0,0 +1,9 @@ +import { config as dotenvConfig } from 'dotenv'; + +// Load .env before any module that reads process.env (e.g. envConfig singleton). +// This must happen in setupFilesAfterEnv, which runs before the spec is imported. +dotenvConfig(); + +// Per-test timeout for real-API calls. Long enough to absorb provider retries on +// transient 5xx/429s without parking the runner indefinitely. +jest.setTimeout(120_000); diff --git a/tests/smoke/README.md b/tests/smoke/README.md new file mode 100644 index 00000000..7330bc54 --- /dev/null +++ b/tests/smoke/README.md @@ -0,0 +1,71 @@ +# Provider-dialect smoke + +A real-API Jest spec for the 4 AI caller stages migrated in PR #145 +(`file-reviewer`, `validation-resolver`, `dedup-resolver`, +`root-cause-extract`). Runs each stage through each real provider +(`anthropic`, `openai`, `bedrock`, `github`) using a slice fixture as input. +Validates plumbing only — the structured-output dialect path returns a +zod-validated response without throwing. Output quality is out of scope and +covered by the deferred per-stage golden-evals follow-up. + +This spec is **not** part of the default `npm test` run. The base +`jest.config.js` constrains `roots` to `tests/unit/`, so this file is +unreachable from `npm test`. It runs under its own config, +`jest.smoke.config.ts`, via `npm run test:smoke`. + +## Architecture + +- **Test runner**: Jest (own config; not picked up by unit or integration lanes). +- **Provider configuration**: per-provider temp `.qualopsrc.json` written to + `tests/smoke/.tmp/` and loaded via `ConfigService.setConfigPath()`. Pricing + + model defaults come from `PROVIDER_DEFAULTS` in `src/config/config.ts` + (with one inline default for GitHub Models, which is not in that table). + Stage classes are obtained via `AIFactory.createForStage('review')` — same + path that production code uses; no direct provider instantiation. +- **Input**: slice fixture at + `evals/datasets/inbox/smoke-sql-injection/` (slice.json + repo/ tree), + loosely following [TDR 0002](../../docs/tdr/0002-evals-from-real-prs.md). +- **Skip vs fail**: a provider whose credential env var is missing is marked + `describe.skip` at module load — the entire 4-stage block is statically + skipped in the test report. A provider with present-but-malformed + credentials is attempted; the provider class's own `validateApiKey()` / + `validateConfiguration()` throws, surfacing as a failed test with a real + error. + +## Run + +```bash +npm run test:smoke +``` + +The CI workflow exports `--json --outputFile=smoke-result.json` to capture +the test results as an artifact. + +## Env vars + +| Provider | Env vars | +|---|---| +| `anthropic` | `ANTHROPIC_API_KEY` | +| `openai` | `OPENAI_API_KEY` (+ optional `OPENAI_BASE_URL` for Azure / proxies) | +| `bedrock` | `AWS_REGION` + `AWS_ACCESS_KEY_ID` + `AWS_SECRET_ACCESS_KEY` | +| `github` | `GITHUB_API_KEY` (a `ghp_…`, `github_pat_…`, etc. PAT — **not** `GITHUB_TOKEN`) | + +In CI, every entry above corresponds to a GitHub Actions repo secret of the +same name (e.g. `secrets.ANTHROPIC_API_KEY`). The `ANTHROPIC_API_KEY` secret +already exists in the repo (used by `ci.yml`); the others must be added +before their providers contribute non-skip coverage in the nightly run. + +## CI + +`.github/workflows/provider-dialect-smoke.yml` — manual `workflow_dispatch` +and nightly cron at 03:17 UTC. Gated on API-key repository secrets. **Not** +part of PR-blocking CI. + +## Notes on `root-cause-extract` + +The stage swallows provider errors internally and returns synthetic +`{rootCause: 'other', confidence: 0}` classifications for every input issue. +A naïve "did the function throw" assertion would always pass even when the +API call silently failed. The spec cross-checks +`AIFactory.createForStage('review').getTokenStats()` and the classification +distribution to detect this case and surface it as a failure. diff --git a/tests/smoke/provider-dialect-smoke.spec.ts b/tests/smoke/provider-dialect-smoke.spec.ts new file mode 100644 index 00000000..07aa19b0 --- /dev/null +++ b/tests/smoke/provider-dialect-smoke.spec.ts @@ -0,0 +1,322 @@ +/** + * Provider-dialect smoke spec. + * + * Automates the unchecked manual smoke item from PR #145's test plan: exercises + * each of the 4 AI caller stages migrated to native structured output + * (file-reviewer, validation-resolver, dedup-resolver, root-cause-extract) + * against each real provider (anthropic, openai, bedrock, github) using a + * slice fixture as input. Validates plumbing only — the provider-specific + * dialect path returns a zod-validated response without throwing. + * + * Output quality is out of scope and covered by the deferred per-stage + * golden-evals follow-up. + * + * NOT part of the default Jest run. The base `jest.config.js` constrains + * `roots` to `tests/unit/`, so this file is unreachable from `npm test`. + * Run via `npm run test:smoke`, which uses `jest.smoke.config.ts`. + * + * A provider is **skipped** when its credential env var is missing; a + * provider with present-but-malformed credentials is **attempted** so + * misconfigured CI secrets surface as real failures via the provider class's + * own validateApiKey() / validateConfiguration(). + */ + +import { existsSync } from 'node:fs'; +import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; +import path from 'node:path'; + +import { AIFactory, clearGlobalAIProvider } from '@/ai/providers'; +import type { AIProvider } from '@/ai/providers/provider'; +import { ConfigService, PROVIDER_DEFAULTS } from '@/config/config'; +import { envConfig } from '@/config/env'; +import { sessionContext, setCurrentSession } from '@/shared/runtime/session-context'; +import type { FileInfo, PipelineJob, ReviewConfig, ReviewIssue } from '@/shared/types'; +import { DeduplicationResolver } from '@/stages/review/processors/dedup-resolver'; +import { FileReviewer } from '@/stages/review/processors/file-reviewer'; +import { ValidationResolver } from '@/stages/review/processors/validation-resolver'; +import { extractRootCauses } from '@/stages/root-cause-extract'; + +// --------------------------------------------------------------------------- +// Constants & types +// --------------------------------------------------------------------------- + +const PROVIDERS = ['anthropic', 'openai', 'bedrock', 'github'] as const; +type ProviderName = (typeof PROVIDERS)[number]; + +// GitHub Models is not in src/config/config.ts PROVIDER_DEFAULTS because it is +// not a default-fallback provider for zero-config mode. A smoke-specific default +// is fine; AIFactory still wires it through OpenAICompatibleProvider correctly. +const GITHUB_DEFAULT = { model: 'gpt-4o-mini', inputPerMillion: 0, outputPerMillion: 0 }; + +const PROJECT_ROOT = path.resolve(__dirname, '..', '..'); +const SLICE_DIR = path.join(PROJECT_ROOT, 'evals', 'datasets', 'inbox', 'smoke-sql-injection'); +const TMP_DIR = path.join(PROJECT_ROOT, 'tests', 'smoke', '.tmp'); +const PROMPTS_DIR = path.join(PROJECT_ROOT, '.qualops', 'prompts'); +const SESSION_ROOT = path.join(PROJECT_ROOT, '.qualops', 'reports', `.smoke-${process.pid}`); +const SMOKE_VALIDATION_PROMPT = '_smoke-validation.md'; +const SMOKE_DEDUP_PROMPT = '_smoke-dedup.md'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function hasCredentials(provider: ProviderName): boolean { + switch (provider) { + case 'anthropic': + return !!envConfig.get('anthropicApiKey'); + case 'openai': + return !!envConfig.get('openaiApiKey'); + case 'bedrock': + return !!( + envConfig.get('awsRegion') && + envConfig.get('awsAccessKeyId') && + envConfig.get('awsSecretAccessKey') + ); + case 'github': + return !!envConfig.get('githubApiKey'); + } +} + +function defaultsFor(provider: ProviderName) { + return provider === 'github' ? GITHUB_DEFAULT : PROVIDER_DEFAULTS[provider]; +} + +interface SliceFixture { + filePath: string; + content: string; + language: string; +} + +async function loadSlice(): Promise { + const slice = JSON.parse(await readFile(path.join(SLICE_DIR, 'slice.json'), 'utf-8')); + const filePath = slice.filePath as string; + const content = await readFile(path.join(SLICE_DIR, 'repo', filePath), 'utf-8'); + return { filePath, content, language: slice.language }; +} + +async function writeProviderConfig(provider: ProviderName): Promise { + const d = defaultsFor(provider); + const cfg = { + ai: { + reviewStage: { + provider, + model: d.model, + inputPerMillion: d.inputPerMillion, + outputPerMillion: d.outputPerMillion, + temperature: 0, + }, + }, + review: { + // root-cause-extract reads only ai.reviewStage; the pipeline is required by + // the config schema but otherwise unused here. Agentic mode has optional + // `passes` — minimal schema-valid shape. + pipeline: [{ name: 'smoke', enabled: true, mode: 'agentic' }], + }, + }; + const fileRel = path.join('tests', 'smoke', '.tmp', `qualopsrc.${provider}.json`); + const fileAbs = path.join(PROJECT_ROOT, fileRel); + await mkdir(path.dirname(fileAbs), { recursive: true }); + await writeFile(fileAbs, JSON.stringify(cfg, null, 2)); + return fileRel; +} + +async function setupPrompts(): Promise<{ systemPrompt: string; cleanup: () => Promise }> { + await mkdir(PROMPTS_DIR, { recursive: true }); + + const validationPath = path.join(PROMPTS_DIR, SMOKE_VALIDATION_PROMPT); + const dedupPath = path.join(PROMPTS_DIR, SMOKE_DEDUP_PROMPT); + + const validationPrompt = + 'You are validating code review findings. For each issue below, decide if it is a true positive. ' + + 'Return a JSON array. Each item has: index (number), is_false_positive (boolean), confidence (1-10), ' + + 'severity (critical|high|medium|low), reasoning (short string).\n'; + const dedupPrompt = + 'You are deduplicating code review findings for a single file. ' + + 'Return the JSON array of indices to KEEP after removing duplicates.\n'; + + await writeFile(validationPath, validationPrompt); + await writeFile(dedupPath, dedupPrompt); + + const bundledSystem = path.join(PROJECT_ROOT, 'src', 'config', 'prompts', 'review', 'quality.md'); + const systemPrompt = existsSync(bundledSystem) + ? await readFile(bundledSystem, 'utf-8') + : 'You are a code reviewer. Return findings as a JSON array per the provided schema.'; + + return { + systemPrompt, + cleanup: async () => { + await rm(validationPath, { force: true }); + await rm(dedupPath, { force: true }); + }, + }; +} + +function buildPipelineJob(): PipelineJob { + return { + name: 'smoke', + enabled: true, + mode: 'agentic', + validation: { enabled: true, minConfidence: 0, prompt: SMOKE_VALIDATION_PROMPT }, + deduplication: { enabled: true, prompt: SMOKE_DEDUP_PROMPT }, + }; +} + +function buildReviewConfig(): ReviewConfig { + return { minConfidence: 0, pipeline: [buildPipelineJob()] }; +} + +function seedIssues(filePath: string): ReviewIssue[] { + const now = Date.now(); + return [ + { + id: `${filePath}-L6-${now}-a`, + file: filePath, + type: 'security', + severity: 'critical', + description: 'Smoke seed: potential SQL injection via string interpolation', + location: '6', + reasoning: 'String interpolation in SQL query allows injection.', + suggestion: 'Use parameterized queries.', + context: 'db.query(`SELECT ... ${userId}`)', + confidence: 9, + knowledge_source: 'smoke', + priority: 1, + estimatedEffort: 'low', + tags: ['security', 'critical', 'ts'], + }, + { + id: `${filePath}-L6-${now}-b`, + file: filePath, + type: 'security', + severity: 'high', + description: 'Smoke seed: same SQL injection (duplicate of A)', + location: '6', + reasoning: 'Restated finding for dedup exercise.', + suggestion: 'Parameterize.', + context: 'db.query template literal', + confidence: 8, + knowledge_source: 'smoke', + priority: 2, + estimatedEffort: 'low', + tags: ['security', 'high', 'ts'], + }, + ]; +} + +async function writeSeedIssueMarkdown(issues: ReviewIssue[], issuesDir: string): Promise { + await mkdir(issuesDir, { recursive: true }); + for (const [idx, issue] of issues.entries()) { + const md = `# ${issue.description} + +**Severity**: ${issue.severity} +**Category**: ${issue.type} + +## Reasoning +${issue.reasoning ?? ''} +`; + await writeFile(path.join(issuesDir, `${idx + 1}-smoke-seed.md`), md); + } +} + +// --------------------------------------------------------------------------- +// Shared setup +// --------------------------------------------------------------------------- + +let slice: SliceFixture; +let file: FileInfo; +let systemPrompt: string; +let cleanupPrompts: () => Promise; + +beforeAll(async () => { + await mkdir(SESSION_ROOT, { recursive: true }); + slice = await loadSlice(); + file = { path: slice.filePath, content: slice.content }; + const setup = await setupPrompts(); + cleanupPrompts = setup.cleanup; + systemPrompt = setup.systemPrompt; +}); + +afterAll(async () => { + await cleanupPrompts(); + await rm(TMP_DIR, { recursive: true, force: true }); + await rm(SESSION_ROOT, { recursive: true, force: true }); + sessionContext.reset(); + AIFactory.clear(); + clearGlobalAIProvider(); +}); + +// --------------------------------------------------------------------------- +// Matrix: 4 providers × 4 stages +// --------------------------------------------------------------------------- + +const reviewConfig = buildReviewConfig(); +const job = buildPipelineJob(); + +for (const provider of PROVIDERS) { + const _describe = hasCredentials(provider) ? describe : describe.skip; + + _describe(`provider-dialect smoke: ${provider}`, () => { + let aiProvider: AIProvider; + let observedIssues: ReviewIssue[] = []; + + beforeAll(async () => { + const configPath = await writeProviderConfig(provider); + ConfigService.setConfigPath(configPath); + AIFactory.clear(); + clearGlobalAIProvider(); + aiProvider = await AIFactory.createForStage('review'); + }); + + it('file-reviewer: structured response validates against ReviewIssuesSchema', async () => { + const reviewer = new FileReviewer(aiProvider, systemPrompt, 'smoke'); + observedIssues = await reviewer.reviewFile(file); + expect(Array.isArray(observedIssues)).toBe(true); + }); + + it('validation-resolver: structured response validates against ValidationResultsSchema', async () => { + // Seed inputs ensure the resolver actually invokes the provider even if + // file-reviewer returned an empty array. + const input = + observedIssues.length > 0 + ? [...observedIssues, ...seedIssues(slice.filePath)] + : seedIssues(slice.filePath); + const resolver = new ValidationResolver(reviewConfig, aiProvider); + const result = await resolver.validate(input, job); + expect(Array.isArray(result)).toBe(true); + }); + + it('dedup-resolver: structured response validates against DedupIndicesSchema', async () => { + // Dedup short-circuits on input.length <= 1, so we need at least 2 issues. + const input = seedIssues(slice.filePath); + const resolver = new DeduplicationResolver(reviewConfig, aiProvider); + const result = await resolver.deduplicate(input, job); + expect(Array.isArray(result)).toBe(true); + }); + + it('root-cause-extract: structured response validates against RootCauseClassificationsSchema', async () => { + // root-cause-extract reads from session-context paths and uses + // AIFactory.createForStage('review') internally — the per-provider + // ConfigService.setConfigPath() in this describe's beforeAll already + // points the factory at the current provider. The stage swallows + // provider errors and returns synthetic `{rootCause: 'other', + // confidence: 0}` per input, so we cross-check token stats and the + // classification distribution to detect silent failures. + setCurrentSession('smoke-session', SESSION_ROOT); + const seeded = seedIssues(slice.filePath); + await writeSeedIssueMarkdown(seeded, path.join(SESSION_ROOT, 'issues')); + + const metadata = await extractRootCauses(); + + const stats = (await AIFactory.createForStage('review')).getTokenStats(); + expect(stats.invocationCount).toBeGreaterThan(0); + expect(stats.totalOutputTokens).toBeGreaterThan(0); + + const classifications = Object.values(metadata.classifications); + expect(classifications.length).toBeGreaterThan(0); + const allFallback = classifications.every( + (c) => c.rootCause === 'other' && c.confidence === 0, + ); + expect(allFallback).toBe(false); + }); + }); +}