diff --git a/packages/cli/package.json b/packages/cli/package.json index 7b014588..c3ccf080 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -71,6 +71,8 @@ "tsyringe": "^4.10.0" }, "devDependencies": { + "@night-watch/core": "*", + "@night-watch/server": "*", "@types/blessed": "^0.1.27", "@types/node": "^22.0.0", "esbuild": "^0.25.0", diff --git a/packages/cli/src/__tests__/commands/run.test.ts b/packages/cli/src/__tests__/commands/run.test.ts index 138172a2..69a8f29c 100644 --- a/packages/cli/src/__tests__/commands/run.test.ts +++ b/packages/cli/src/__tests__/commands/run.test.ts @@ -31,11 +31,17 @@ import { scanPrdDirectory, getRateLimitFallbackTelegramWebhooks, isRateLimitFallbackTriggered, + recordRunSessionOutcome, resolveRunNotificationEvent, shouldAttemptCrossProjectFallback, } from '@/cli/commands/run.js'; import { applyScheduleOffset, buildCronPathPrefix } from '@/cli/commands/install.js'; import { INightWatchConfig } from '@night-watch/core/types.js'; +import { closeDb } from '@night-watch/core/storage/sqlite/client.js'; +import { + getRepositories, + resetRepositories, +} from '@night-watch/core/storage/repositories/index.js'; import { sendNotifications } from '@night-watch/core/utils/notify.js'; // Helper to create a valid config without budget fields @@ -62,6 +68,7 @@ function createTestConfig(overrides: Partial = {}): INightWat describe('run command', () => { let tempDir: string; let originalEnv: NodeJS.ProcessEnv; + let originalNightWatchHome: string | undefined; beforeEach(() => { tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'night-watch-test-')); @@ -69,6 +76,7 @@ describe('run command', () => { // Save original environment originalEnv = { ...process.env }; + originalNightWatchHome = process.env.NIGHT_WATCH_HOME; // Clear NW_* environment variables for (const key of Object.keys(process.env)) { @@ -81,8 +89,16 @@ describe('run command', () => { }); afterEach(() => { + closeDb(); + resetRepositories(); fs.rmSync(tempDir, { recursive: true, force: true }); + if (originalNightWatchHome === undefined) { + delete process.env.NIGHT_WATCH_HOME; + } else { + process.env.NIGHT_WATCH_HOME = originalNightWatchHome; + } + // Restore original environment for (const key of Object.keys(process.env)) { if (key.startsWith('NW_')) { @@ -480,6 +496,49 @@ describe('run command', () => { }); }); + describe('outcome recording', () => { + it('should record executor outcome after script exits', () => { + process.env.NIGHT_WATCH_HOME = path.join(tempDir, '.night-watch-home'); + closeDb(); + resetRepositories(); + + const config = createTestConfig(); + const startedAt = 1_700_000_000_000; + const finishedAt = 1_700_000_003_000; + + recordRunSessionOutcome({ + projectDir: tempDir, + config, + envVars: { + NW_PROVIDER_KEY: 'claude-native', + NW_PROVIDER_CMD: 'claude', + NW_PROVIDER_LABEL: 'Claude', + }, + startedAt, + finishedAt, + exitCode: 1, + stderr: "packages/core/src/index.ts:1:1 - error TS2305: Module has no exported member 'x'.", + scriptResult: { + status: 'failure', + data: { prd: '97-feedback.md', branch: 'night-watch/nw-97' }, + }, + }); + + const outcomes = getRepositories().sessionOutcomes.queryOutcomes({ + projectPath: tempDir, + jobType: 'executor', + }); + + expect(outcomes).toHaveLength(1); + expect(outcomes[0].providerKey).toBe('claude-native'); + expect(outcomes[0].durationSeconds).toBe(3); + expect(outcomes[0].outcome).toBe('failure'); + expect(outcomes[0].failureCategory).toBe('typescript'); + expect(outcomes[0].prdFile).toBe('97-feedback.md'); + expect(outcomes[0].branchName).toBe('night-watch/nw-97'); + }); + }); + describe('applyScheduleOffset', () => { it('should replace minute field with offset', () => { expect(applyScheduleOffset('0 0-21 * * *', 15)).toBe('15 0-21 * * *'); diff --git a/packages/cli/src/__tests__/scripts/core-flow-smoke.test.ts b/packages/cli/src/__tests__/scripts/core-flow-smoke.test.ts index a5e2e013..72d8b002 100644 --- a/packages/cli/src/__tests__/scripts/core-flow-smoke.test.ts +++ b/packages/cli/src/__tests__/scripts/core-flow-smoke.test.ts @@ -105,6 +105,13 @@ function commitAll(projectDir: string, message: string): void { }); } +function writeFakeClaude(fakeBin: string): void { + fs.writeFileSync(path.join(fakeBin, 'claude'), '#!/usr/bin/env bash\nexit 0\n', { + encoding: 'utf-8', + mode: 0o755, + }); +} + afterEach(() => { for (const dir of tempDirs) { fs.rmSync(dir, { recursive: true, force: true }); @@ -1932,6 +1939,7 @@ describe('core flow smoke tests (bash scripts)', () => { fs.mkdirSync(path.join(projectDir, 'logs'), { recursive: true }); const fakeBin = mkTempDir('nw-smoke-reviewer-score-threshold-bin-'); + writeFakeClaude(fakeBin); fs.writeFileSync( path.join(fakeBin, 'gh'), @@ -2070,6 +2078,7 @@ describe('core flow smoke tests (bash scripts)', () => { fs.mkdirSync(path.join(projectDir, 'logs'), { recursive: true }); const fakeBin = mkTempDir('nw-smoke-reviewer-needs-human-review-bin-'); + writeFakeClaude(fakeBin); fs.writeFileSync( path.join(fakeBin, 'gh'), @@ -2339,6 +2348,7 @@ describe('core flow smoke tests (bash scripts)', () => { fs.mkdirSync(path.join(projectDir, 'logs'), { recursive: true }); const fakeBin = mkTempDir('nw-smoke-reviewer-max-prs-per-run-bin-'); + writeFakeClaude(fakeBin); fs.writeFileSync( path.join(fakeBin, 'gh'), diff --git a/packages/cli/src/commands/analytics.ts b/packages/cli/src/commands/analytics.ts index e200bad9..43d523b3 100644 --- a/packages/cli/src/commands/analytics.ts +++ b/packages/cli/src/commands/analytics.ts @@ -14,6 +14,7 @@ import { runAnalytics, } from '@night-watch/core'; import { maybeApplyCronSchedulingDelay } from './shared/env-builder.js'; +import { recordJobOutcome } from './shared/feedback.js'; export interface IAnalyticsOptions { dryRun: boolean; @@ -58,6 +59,27 @@ export function analyticsCommand(program: Command): void { const apiKey = config.providerEnv?.AMPLITUDE_API_KEY; const secretKey = config.providerEnv?.AMPLITUDE_SECRET_KEY; if (!apiKey || !secretKey) { + const now = Date.now(); + if (!options.dryRun) { + try { + recordJobOutcome({ + config, + exitCode: 1, + finishedAt: now, + jobType: 'analytics', + metadata: { + missingAmplitudeCredentials: true, + }, + projectDir, + providerKey: resolveJobProvider(config, 'analytics'), + startedAt: now, + stderr: + 'AMPLITUDE_API_KEY and AMPLITUDE_SECRET_KEY must be set in providerEnv to run analytics.', + }); + } catch { + // Outcome persistence must not change command exit behavior. + } + } info( 'AMPLITUDE_API_KEY and AMPLITUDE_SECRET_KEY must be set in providerEnv to run analytics.', ); @@ -84,13 +106,49 @@ export function analyticsCommand(program: Command): void { const spinner = createSpinner('Running analytics job...'); spinner.start(); + const startedAt = Date.now(); try { await maybeApplyCronSchedulingDelay(config, 'analytics', projectDir); const result = await runAnalytics(config, projectDir); + try { + recordJobOutcome({ + config, + exitCode: 0, + finishedAt: Date.now(), + jobType: 'analytics', + metadata: { + lookbackDays: config.analytics.lookbackDays, + summary: result.summary, + }, + projectDir, + providerKey: resolveJobProvider(config, 'analytics'), + startedAt, + stdout: result.summary, + }); + } catch { + // Outcome persistence must not change command exit behavior. + } spinner.succeed(`Analytics complete — ${result.summary}`); } catch (err) { + try { + recordJobOutcome({ + config, + exitCode: 1, + finishedAt: Date.now(), + jobType: 'analytics', + metadata: { + lookbackDays: config.analytics.lookbackDays, + }, + projectDir, + providerKey: resolveJobProvider(config, 'analytics'), + startedAt, + stderr: err instanceof Error ? err.message : String(err), + }); + } catch { + // Outcome persistence must not change command exit behavior. + } spinner.fail(`Analytics failed: ${err instanceof Error ? err.message : String(err)}`); process.exit(1); } diff --git a/packages/cli/src/commands/audit.ts b/packages/cli/src/commands/audit.ts index f0d6b9f7..2a7e84c6 100644 --- a/packages/cli/src/commands/audit.ts +++ b/packages/cli/src/commands/audit.ts @@ -26,6 +26,7 @@ import { getTelegramStatusWebhooks, maybeApplyCronSchedulingDelay, } from './shared/env-builder.js'; +import { recordJobOutcome } from './shared/feedback.js'; export interface IAuditOptions { dryRun: boolean; @@ -130,6 +131,7 @@ export function auditCommand(program: Command): void { const spinner = createSpinner('Running code audit...'); spinner.start(); + const startedAt = Date.now(); try { await maybeApplyCronSchedulingDelay(config, 'audit', projectDir); @@ -138,8 +140,32 @@ export function auditCommand(program: Command): void { [projectDir], envVars, ); + const finishedAt = Date.now(); const scriptResult = parseScriptResult(`${stdout}\n${stderr}`); + if (!options.dryRun) { + try { + recordJobOutcome({ + config, + exitCode, + finishedAt, + jobType: 'audit', + metadata: { + providerCommand: envVars.NW_PROVIDER_CMD, + providerLabel: envVars.NW_PROVIDER_LABEL, + }, + projectDir, + providerKey: envVars.NW_PROVIDER_KEY ?? resolveJobProvider(config, 'audit'), + scriptResult, + startedAt, + stderr, + stdout, + }); + } catch { + // Outcome persistence must not change command exit behavior. + } + } + if (exitCode === 0) { if (scriptResult?.status === 'queued') { spinner.succeed('Code audit queued — another job is currently running'); @@ -186,6 +212,24 @@ export function auditCommand(program: Command): void { process.exit(exitCode || 1); } } catch (err) { + try { + recordJobOutcome({ + config, + exitCode: 1, + finishedAt: Date.now(), + jobType: 'audit', + metadata: { + providerCommand: envVars.NW_PROVIDER_CMD, + providerLabel: envVars.NW_PROVIDER_LABEL, + }, + projectDir, + providerKey: envVars.NW_PROVIDER_KEY ?? resolveJobProvider(config, 'audit'), + startedAt, + stderr: err instanceof Error ? err.message : String(err), + }); + } catch { + // Outcome persistence must not change command exit behavior. + } spinner.fail(`Code audit failed: ${err instanceof Error ? err.message : String(err)}`); process.exit(1); } diff --git a/packages/cli/src/commands/init.ts b/packages/cli/src/commands/init.ts index 7bedbf3d..8b7d0a34 100644 --- a/packages/cli/src/commands/init.ts +++ b/packages/cli/src/commands/init.ts @@ -378,6 +378,7 @@ export function buildInitConfig(params: { }, audit: { ...defaults.audit }, analytics: { ...defaults.analytics }, + feedback: { ...defaults.feedback }, merger: { ...defaults.merger }, prResolver: { ...defaults.prResolver }, jobProviders: { ...defaults.jobProviders }, diff --git a/packages/cli/src/commands/merge.ts b/packages/cli/src/commands/merge.ts index 10a34586..4b2f7a13 100644 --- a/packages/cli/src/commands/merge.ts +++ b/packages/cli/src/commands/merge.ts @@ -23,6 +23,7 @@ import { formatProviderDisplay, maybeApplyCronSchedulingDelay, } from './shared/env-builder.js'; +import { recordJobOutcome } from './shared/feedback.js'; import * as path from 'path'; /** @@ -49,9 +50,7 @@ export function buildEnvVars( env.NW_MERGER_MERGE_METHOD = config.merger.mergeMethod; env.NW_MERGER_MIN_REVIEW_SCORE = String(config.merger.minReviewScore); env.NW_MERGER_BRANCH_PATTERNS = ( - config.merger.branchPatterns.length > 0 - ? config.merger.branchPatterns - : config.branchPatterns + config.merger.branchPatterns.length > 0 ? config.merger.branchPatterns : config.branchPatterns ).join(','); env.NW_MERGER_REBASE_BEFORE_MERGE = config.merger.rebaseBeforeMerge ? '1' : '0'; env.NW_MERGER_MAX_PRS_PER_RUN = String(config.merger.maxPrsPerRun); @@ -186,12 +185,14 @@ export function mergeCommand(program: Command): void { spinner.start(); try { + const startedAt = Date.now(); await maybeApplyCronSchedulingDelay(config, 'merger', projectDir); const { exitCode, stdout, stderr } = await executeScriptWithOutput( scriptPath, [projectDir], envVars, ); + const finishedAt = Date.now(); const scriptResult = parseScriptResult(`${stdout}\n${stderr}`); if (exitCode === 0) { @@ -212,6 +213,32 @@ export function mergeCommand(program: Command): void { const notificationEvent = resolveMergeNotificationEvent(exitCode, mergedCount, failedCount); + if (!options.dryRun) { + try { + recordJobOutcome({ + config, + exitCode, + finishedAt, + jobType: 'merger', + metadata: { + failedCount, + mergedCount, + providerCommand: envVars.NW_PROVIDER_CMD, + providerLabel: envVars.NW_PROVIDER_LABEL, + }, + minReviewScore: config.merger.minReviewScore, + projectDir, + providerKey: envVars.NW_PROVIDER_KEY ?? resolveJobProvider(config, 'merger'), + scriptResult, + startedAt, + stderr, + stdout, + }); + } catch { + // Outcome persistence must not change command exit behavior. + } + } + if (notificationEvent) { await sendNotifications(config, { event: notificationEvent, diff --git a/packages/cli/src/commands/plan.ts b/packages/cli/src/commands/plan.ts index a52f6bdb..56c2214f 100644 --- a/packages/cli/src/commands/plan.ts +++ b/packages/cli/src/commands/plan.ts @@ -18,6 +18,7 @@ import { resolveJobProvider, } from '@night-watch/core'; import { buildBaseEnvVars } from './shared/env-builder.js'; +import { recordJobOutcome } from './shared/feedback.js'; import * as path from 'path'; export interface IPlanOptions { @@ -85,9 +86,13 @@ export function planCommand(program: Command): void { header('Provider Invocation'); if (plannerProvider === 'claude') { - dim(` ${PROVIDER_COMMANDS[plannerProvider]} -p "" --dangerously-skip-permissions`); + dim( + ` ${PROVIDER_COMMANDS[plannerProvider]} -p "" --dangerously-skip-permissions`, + ); } else { - dim(` ${PROVIDER_COMMANDS[plannerProvider]} exec --yolo ""`); + dim( + ` ${PROVIDER_COMMANDS[plannerProvider]} exec --yolo ""`, + ); } header('Command'); @@ -100,6 +105,7 @@ export function planCommand(program: Command): void { const label = resolvedTask ? `Planning: ${resolvedTask}` : 'Running PRD planner...'; const spinner = createSpinner(label); spinner.start(); + const startedAt = Date.now(); try { const { exitCode, stdout, stderr } = await executeScriptWithOutput( @@ -108,10 +114,35 @@ export function planCommand(program: Command): void { envVars, { cwd: projectDir }, ); + const finishedAt = Date.now(); const scriptResult = parseScriptResult(`${stdout}\n${stderr}`); + try { + recordJobOutcome({ + config, + exitCode, + finishedAt, + jobType: 'planner', + metadata: { + providerCommand: envVars.NW_PROVIDER_CMD, + providerLabel: envVars.NW_PROVIDER_LABEL, + task: resolvedTask, + }, + projectDir, + providerKey: envVars.NW_PROVIDER_KEY ?? resolveJobProvider(config, 'planner'), + scriptResult, + startedAt, + stderr, + stdout, + }); + } catch { + // Outcome persistence must not change command exit behavior. + } + if (exitCode === 0) { - spinner.succeed(`PRD planner complete — PRD written to ${path.join(projectDir, config.prdDir)}/`); + spinner.succeed( + `PRD planner complete — PRD written to ${path.join(projectDir, config.prdDir)}/`, + ); } else if (exitCode === 124) { spinner.fail('PRD planner timed out'); process.exit(1); @@ -121,6 +152,25 @@ export function planCommand(program: Command): void { process.exit(exitCode || 1); } } catch (err) { + try { + recordJobOutcome({ + config, + exitCode: 1, + finishedAt: Date.now(), + jobType: 'planner', + metadata: { + providerCommand: envVars.NW_PROVIDER_CMD, + providerLabel: envVars.NW_PROVIDER_LABEL, + task: resolvedTask, + }, + projectDir, + providerKey: envVars.NW_PROVIDER_KEY ?? resolveJobProvider(config, 'planner'), + startedAt, + stderr: err instanceof Error ? err.message : String(err), + }); + } catch { + // Outcome persistence must not change command exit behavior. + } spinner.fail(`PRD planner failed: ${err instanceof Error ? err.message : String(err)}`); process.exit(1); } diff --git a/packages/cli/src/commands/qa.ts b/packages/cli/src/commands/qa.ts index baf14ee1..1a4d5d7c 100644 --- a/packages/cli/src/commands/qa.ts +++ b/packages/cli/src/commands/qa.ts @@ -28,6 +28,7 @@ import { getTelegramStatusWebhooks, maybeApplyCronSchedulingDelay, } from './shared/env-builder.js'; +import { recordJobOutcome } from './shared/feedback.js'; import * as path from 'path'; /** @@ -218,12 +219,14 @@ export function qaCommand(program: Command): void { spinner.start(); try { + const startedAt = Date.now(); await maybeApplyCronSchedulingDelay(config, 'qa', projectDir); const { exitCode, stdout, stderr } = await executeScriptWithOutput( scriptPath, [projectDir], envVars, ); + const finishedAt = Date.now(); const scriptResult = parseScriptResult(`${stdout}\n${stderr}`); if (exitCode === 0) { @@ -242,6 +245,27 @@ export function qaCommand(program: Command): void { // Send notifications (fire-and-forget, failures do not affect exit code) if (!options.dryRun) { + try { + recordJobOutcome({ + config, + exitCode, + finishedAt, + jobType: 'qa', + metadata: { + providerCommand: envVars.NW_PROVIDER_CMD, + providerLabel: envVars.NW_PROVIDER_LABEL, + }, + projectDir, + providerKey: envVars.NW_PROVIDER_KEY ?? resolveJobProvider(config, 'qa'), + scriptResult, + startedAt, + stderr, + stdout, + }); + } catch { + // Outcome persistence must not change command exit behavior. + } + const skipNotification = !shouldSendQaNotification(scriptResult?.status); if (skipNotification) { diff --git a/packages/cli/src/commands/resolve.ts b/packages/cli/src/commands/resolve.ts index ad7c0bf2..18b64749 100644 --- a/packages/cli/src/commands/resolve.ts +++ b/packages/cli/src/commands/resolve.ts @@ -23,6 +23,7 @@ import { formatProviderDisplay, maybeApplyCronSchedulingDelay, } from './shared/env-builder.js'; +import { recordJobOutcome } from './shared/feedback.js'; import { execFileSync } from 'child_process'; import * as path from 'path'; @@ -210,12 +211,14 @@ export function resolveCommand(program: Command): void { spinner.start(); try { + const startedAt = Date.now(); await maybeApplyCronSchedulingDelay(config, 'pr-resolver', projectDir); const { exitCode, stdout, stderr } = await executeScriptWithOutput( scriptPath, [projectDir], envVars, ); + const finishedAt = Date.now(); const scriptResult = parseScriptResult(`${stdout}\n${stderr}`); if (exitCode === 0) { @@ -234,6 +237,29 @@ export function resolveCommand(program: Command): void { const notificationEvent = exitCode === 0 ? ('pr_resolver_completed' as const) : ('pr_resolver_failed' as const); + if (!options.dryRun) { + try { + recordJobOutcome({ + config, + exitCode, + finishedAt, + jobType: 'pr-resolver', + metadata: { + providerCommand: envVars.NW_PROVIDER_CMD, + providerLabel: envVars.NW_PROVIDER_LABEL, + }, + projectDir, + providerKey: envVars.NW_PROVIDER_KEY ?? resolveJobProvider(config, 'pr-resolver'), + scriptResult, + startedAt, + stderr, + stdout, + }); + } catch { + // Outcome persistence must not change command exit behavior. + } + } + await sendNotifications(config, { event: notificationEvent, projectName: path.basename(projectDir), diff --git a/packages/cli/src/commands/review.ts b/packages/cli/src/commands/review.ts index 0c095424..04980bfd 100644 --- a/packages/cli/src/commands/review.ts +++ b/packages/cli/src/commands/review.ts @@ -7,15 +7,20 @@ import { CLAUDE_MODEL_IDS, INightWatchConfig, PROVIDER_COMMANDS, + analyzeFeedbackOutcome, + buildProjectFeedbackPromptBlock, + buildSessionOutcomeInput, createSpinner, createTable, dim, executeScriptWithOutput, fetchPrDetailsByNumber, fetchReviewedPrDetails, + getRepositories, getScriptPath, header, info, + isFeedbackPromptEnabled, loadConfig, parseScriptResult, resolveJobProvider, @@ -27,7 +32,8 @@ import { formatProviderDisplay, maybeApplyCronSchedulingDelay, } from './shared/env-builder.js'; -import type { IPrDetails } from '@night-watch/core'; +import { getFeedbackAnalysisOptions, isFeedbackEnabled } from './shared/feedback.js'; +import type { IPrDetails, JobType } from '@night-watch/core'; import { execFileSync } from 'child_process'; import * as path from 'path'; @@ -130,6 +136,33 @@ export function buildReviewNotificationTargets( })); } +export function applyProjectFeedbackPromptEnv( + envVars: Record, + projectDir: string, + jobType: JobType, + markApplied = true, +): void { + delete envVars.NW_PROJECT_FEEDBACK_PROMPT; + const config = loadConfig(projectDir); + if (!isFeedbackPromptEnabled() || config.feedback?.enabled === false) { + return; + } + + try { + const { promptBlock } = buildProjectFeedbackPromptBlock( + getRepositories().sessionOutcomes, + projectDir, + jobType, + { markApplied, maxActiveAugmentations: config.feedback?.maxActiveAugmentations }, + ); + if (promptBlock.length > 0) { + envVars.NW_PROJECT_FEEDBACK_PROMPT = promptBlock; + } + } catch { + // Feedback prompt context must never block the primary reviewer path. + } +} + /** * Parse retry attempts from script result data. * Returns the number of attempts (defaults to 1 if not present or invalid). @@ -342,6 +375,7 @@ export function reviewCommand(program: Command): void { // Build environment variables const envVars = buildEnvVars(config, options); + applyProjectFeedbackPromptEnv(envVars, projectDir, 'reviewer', !options.dryRun); // Get the script path const scriptPath = getScriptPath('night-watch-pr-reviewer-cron.sh'); @@ -431,12 +465,14 @@ export function reviewCommand(program: Command): void { spinner.start(); try { + const startedAt = Date.now(); await maybeApplyCronSchedulingDelay(config, 'reviewer', projectDir); const { exitCode, stdout, stderr } = await executeScriptWithOutput( scriptPath, [projectDir], envVars, ); + const finishedAt = Date.now(); const scriptResult = parseScriptResult(`${stdout}\n${stderr}`); if (exitCode === 0) { @@ -453,6 +489,33 @@ export function reviewCommand(program: Command): void { // Send notifications (fire-and-forget, failures do not affect exit code) if (!options.dryRun) { + try { + const repository = getRepositories().sessionOutcomes; + const storedOutcome = repository.insertOutcome( + buildSessionOutcomeInput({ + exitCode, + finishedAt, + jobType: 'reviewer', + metadata: { + providerCommand: envVars.NW_PROVIDER_CMD, + providerLabel: envVars.NW_PROVIDER_LABEL, + }, + minReviewScore: config.minReviewScore, + projectPath: projectDir, + providerKey: envVars.NW_PROVIDER_KEY ?? resolveJobProvider(config, 'reviewer'), + scriptResult, + startedAt, + stderr, + stdout, + }), + ); + if (isFeedbackEnabled(config)) { + analyzeFeedbackOutcome(repository, storedOutcome, getFeedbackAnalysisOptions(config)); + } + } catch { + // Outcome persistence must not change command exit behavior. + } + const shouldNotifyCompletion = shouldSendReviewCompletionNotification( exitCode, scriptResult?.status, diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts index eb3dae26..d92a3727 100644 --- a/packages/cli/src/commands/run.ts +++ b/packages/cli/src/commands/run.ts @@ -10,6 +10,9 @@ import { IWebhookConfig, NotificationEvent, PROVIDER_COMMANDS, + analyzeFeedbackOutcome, + buildProjectFeedbackPromptBlock, + buildSessionOutcomeInput, createBoardProvider, createSpinner, createTable, @@ -17,9 +20,11 @@ import { executeScriptWithOutput, fetchPrDetails, fetchPrDetailsForBranch, + getRepositories, getScriptPath, header, info, + isFeedbackPromptEnabled, loadConfig, parseScriptResult, resolveJobProvider, @@ -30,7 +35,8 @@ import { warn, } from '@night-watch/core'; import { buildBaseEnvVars, maybeApplyCronSchedulingDelay } from './shared/env-builder.js'; -import type { IPrDetails } from '@night-watch/core'; +import { getFeedbackAnalysisOptions, isFeedbackEnabled } from './shared/feedback.js'; +import type { IPrDetails, JobType } from '@night-watch/core'; import * as fs from 'fs'; import * as path from 'path'; @@ -44,6 +50,19 @@ export interface IRunOptions { crossProjectFallback?: boolean; } +export interface IRunOutcomeRecordInput { + projectDir: string; + config: INightWatchConfig; + envVars: Record; + startedAt: number; + finishedAt: number; + exitCode: number; + stdout?: string; + stderr?: string; + scriptResult?: ReturnType; + metadata?: Record; +} + /** * Map executor exit/result state to a notification event. * Returns null when the run completed with no actionable work (skip/no-op). @@ -203,17 +222,37 @@ async function runCrossProjectFallback( let candidateConfig = loadConfig(candidate.path); candidateConfig = applyCliOverrides(candidateConfig, options); const envVars = buildEnvVars(candidateConfig, options); + applyProjectFeedbackPromptEnv(envVars, candidate.path, 'executor'); envVars.NW_CROSS_PROJECT_FALLBACK_ACTIVE = '1'; try { + const startedAt = Date.now(); const { exitCode, stdout, stderr } = await executeScriptWithOutput( scriptPath, [candidate.path], envVars, { cwd: candidate.path }, ); + const finishedAt = Date.now(); const scriptResult = parseScriptResult(`${stdout}\n${stderr}`); + try { + recordRunSessionOutcome({ + projectDir: candidate.path, + config: candidateConfig, + envVars, + startedAt, + finishedAt, + exitCode, + stdout, + stderr, + scriptResult, + metadata: { crossProjectFallback: true }, + }); + } catch { + // Outcome persistence must not change fallback execution behavior. + } + if (!options.dryRun) { await sendRunCompletionNotifications( candidateConfig, @@ -276,6 +315,58 @@ export function isRateLimitFallbackTriggered(resultData?: Record return resultData?.rate_limit_fallback === '1'; } +export function recordRunSessionOutcome(input: IRunOutcomeRecordInput): void { + const outcome = buildSessionOutcomeInput({ + projectPath: input.projectDir, + jobType: 'executor', + providerKey: input.envVars.NW_PROVIDER_KEY ?? resolveJobProvider(input.config, 'executor'), + startedAt: input.startedAt, + finishedAt: input.finishedAt, + exitCode: input.exitCode, + stdout: input.stdout, + stderr: input.stderr, + scriptResult: input.scriptResult, + metadata: { + providerCommand: input.envVars.NW_PROVIDER_CMD, + providerLabel: input.envVars.NW_PROVIDER_LABEL, + ...(input.metadata ?? {}), + }, + }); + + const repository = getRepositories().sessionOutcomes; + const storedOutcome = repository.insertOutcome(outcome); + if (isFeedbackEnabled(input.config)) { + analyzeFeedbackOutcome(repository, storedOutcome, getFeedbackAnalysisOptions(input.config)); + } +} + +export function applyProjectFeedbackPromptEnv( + envVars: Record, + projectDir: string, + jobType: JobType, + markApplied = true, +): void { + delete envVars.NW_PROJECT_FEEDBACK_PROMPT; + const config = loadConfig(projectDir); + if (!isFeedbackPromptEnabled() || config.feedback?.enabled === false) { + return; + } + + try { + const { promptBlock } = buildProjectFeedbackPromptBlock( + getRepositories().sessionOutcomes, + projectDir, + jobType, + { markApplied, maxActiveAugmentations: config.feedback?.maxActiveAugmentations }, + ); + if (promptBlock.length > 0) { + envVars.NW_PROJECT_FEEDBACK_PROMPT = promptBlock; + } + } catch { + // Feedback prompt context must never block the primary executor path. + } +} + /** * Build environment variables map from config and CLI options */ @@ -507,6 +598,7 @@ export function runCommand(program: Command): void { // Build environment variables const envVars = buildEnvVars(config, options); + applyProjectFeedbackPromptEnv(envVars, projectDir, 'executor', !options.dryRun); // Get the script path const scriptPath = getScriptPath('night-watch-cron.sh'); @@ -620,6 +712,7 @@ export function runCommand(program: Command): void { spinner.start(); try { + const startedAt = Date.now(); await maybeApplyCronSchedulingDelay(config, 'executor', projectDir); const { exitCode, stdout, stderr } = await executeScriptWithOutput( scriptPath, @@ -627,6 +720,7 @@ export function runCommand(program: Command): void { envVars, { cwd: projectDir }, ); + const finishedAt = Date.now(); const scriptResult = parseScriptResult(`${stdout}\n${stderr}`); if (exitCode === 0) { @@ -645,6 +739,22 @@ export function runCommand(program: Command): void { // Send completion notifications (fire-and-forget, failures do not affect exit code) if (!options.dryRun) { + try { + recordRunSessionOutcome({ + projectDir, + config, + envVars, + startedAt, + finishedAt, + exitCode, + stdout, + stderr, + scriptResult, + }); + } catch { + // Outcome persistence must not change command exit behavior. + } + await sendRunCompletionNotifications(config, projectDir, options, exitCode, scriptResult); } diff --git a/packages/cli/src/commands/shared/feedback.ts b/packages/cli/src/commands/shared/feedback.ts new file mode 100644 index 00000000..57ba0c28 --- /dev/null +++ b/packages/cli/src/commands/shared/feedback.ts @@ -0,0 +1,67 @@ +import { + INightWatchConfig, + IScriptResult, + JobType, + analyzeFeedbackOutcome, + buildSessionOutcomeInput, + getRepositories, + isFeedbackPromptEnabled, + resolveJobProvider, +} from '@night-watch/core'; + +export interface IRecordJobOutcomeInput { + config: INightWatchConfig; + exitCode: number; + finishedAt: number; + jobType: JobType; + metadata?: Record; + minReviewScore?: number; + projectDir: string; + providerKey?: string; + scriptResult?: IScriptResult | null; + startedAt: number; + stderr?: string; + stdout?: string; +} + +export function getFeedbackAnalysisOptions(config: INightWatchConfig) { + const feedback = config.feedback ?? { + augmentationTtlDays: 14, + confidenceThreshold: 0.75, + maxActiveAugmentations: 3, + successStreakToExpire: 3, + }; + return { + augmentationTtlMs: feedback.augmentationTtlDays * 24 * 60 * 60 * 1000, + confidenceThreshold: feedback.confidenceThreshold, + maxActiveAugmentations: feedback.maxActiveAugmentations, + successStreakToExpire: feedback.successStreakToExpire, + }; +} + +export function isFeedbackEnabled(config: INightWatchConfig): boolean { + return config.feedback?.enabled !== false && isFeedbackPromptEnabled(); +} + +export function recordJobOutcome(input: IRecordJobOutcomeInput): void { + const repository = getRepositories().sessionOutcomes; + const storedOutcome = repository.insertOutcome( + buildSessionOutcomeInput({ + exitCode: input.exitCode, + finishedAt: input.finishedAt, + jobType: input.jobType, + metadata: input.metadata, + minReviewScore: input.minReviewScore, + projectPath: input.projectDir, + providerKey: input.providerKey ?? resolveJobProvider(input.config, input.jobType), + scriptResult: input.scriptResult, + startedAt: input.startedAt, + stderr: input.stderr, + stdout: input.stdout, + }), + ); + + if (isFeedbackEnabled(input.config)) { + analyzeFeedbackOutcome(repository, storedOutcome, getFeedbackAnalysisOptions(input.config)); + } +} diff --git a/packages/cli/src/commands/slice.ts b/packages/cli/src/commands/slice.ts index bdc33550..9886bf98 100644 --- a/packages/cli/src/commands/slice.ts +++ b/packages/cli/src/commands/slice.ts @@ -30,6 +30,7 @@ import { getTelegramStatusWebhooks, maybeApplyCronSchedulingDelay, } from './shared/env-builder.js'; +import { recordJobOutcome } from './shared/feedback.js'; import type { ISliceResult } from '@night-watch/core'; import * as fs from 'fs'; import * as path from 'path'; @@ -152,7 +153,11 @@ export async function createPlannerIssue( } const issueTitle = `PRD: ${result.item.title}`; - const normalizeTitle = (t: string) => t.replace(/^PRD:\s*/i, '').trim().toLowerCase(); + const normalizeTitle = (t: string) => + t + .replace(/^PRD:\s*/i, '') + .trim() + .toLowerCase(); const existingIssues = await provider.getAllIssues(); const existing = existingIssues.find( @@ -358,6 +363,7 @@ export function sliceCommand(program: Command): void { // Execute planner with spinner const spinner = createSpinner('Running Planner...'); spinner.start(); + const startedAt = Date.now(); try { await maybeApplyCronSchedulingDelay(config, 'slicer', projectDir); @@ -395,6 +401,30 @@ export function sliceCommand(program: Command): void { const nothingPending = result.error === 'No pending items to process'; const exitCode = result.sliced || nothingPending ? 0 : 1; + if (!options.dryRun) { + try { + recordJobOutcome({ + config, + exitCode, + finishedAt: Date.now(), + jobType: 'planner', + metadata: { + error: result.error ?? null, + file: result.file ?? null, + itemTitle: result.item?.title ?? null, + sliced: result.sliced, + }, + projectDir, + providerKey: resolveJobProvider(config, 'slicer'), + startedAt, + stderr: result.error, + stdout: result.file ? `Created ${result.file}` : undefined, + }); + } catch { + // Outcome persistence must not change command exit behavior. + } + } + if (!options.dryRun && result.sliced) { await sendNotifications(config, { event: 'run_succeeded', @@ -414,6 +444,23 @@ export function sliceCommand(program: Command): void { process.exit(exitCode); } catch (err) { + try { + recordJobOutcome({ + config, + exitCode: 1, + finishedAt: Date.now(), + jobType: 'planner', + metadata: { + error: err instanceof Error ? err.message : String(err), + }, + projectDir, + providerKey: resolveJobProvider(config, 'slicer'), + startedAt, + stderr: err instanceof Error ? err.message : String(err), + }); + } catch { + // Outcome persistence must not change command exit behavior. + } spinner.fail('Failed to execute planner command'); uiError(`${err instanceof Error ? err.message : String(err)}`); process.exit(1); diff --git a/packages/core/src/__tests__/config.test.ts b/packages/core/src/__tests__/config.test.ts index 7d76b248..95ab29c1 100644 --- a/packages/core/src/__tests__/config.test.ts +++ b/packages/core/src/__tests__/config.test.ts @@ -73,6 +73,13 @@ describe('config', () => { expect(config.reviewerSchedule).toBe('25 */3 * * *'); expect(config.reviewerMaxPrsPerRun).toBe(0); expect(config.scheduleBundleId).toBe('always-on'); + expect(config.feedback).toEqual({ + enabled: true, + confidenceThreshold: 0.75, + augmentationTtlDays: 14, + maxActiveAugmentations: 3, + successStreakToExpire: 3, + }); }); it('should return defaults with provider and reviewerEnabled', () => { @@ -490,6 +497,34 @@ describe('config', () => { expect(config.reviewerMaxPrsPerRun).toBe(4); }); + it('should handle feedback config and env overrides', () => { + fs.writeFileSync( + path.join(tempDir, 'night-watch.config.json'), + JSON.stringify({ + feedback: { + enabled: false, + confidenceThreshold: 0.5, + augmentationTtlDays: 7, + maxActiveAugmentations: 2, + successStreakToExpire: 4, + }, + }), + ); + process.env.NW_FEEDBACK_ENABLED = 'true'; + process.env.NW_FEEDBACK_CONFIDENCE_THRESHOLD = '0.9'; + process.env.NW_FEEDBACK_MAX_ACTIVE_AUGMENTATIONS = '5'; + + const config = loadConfig(tempDir); + + expect(config.feedback).toEqual({ + enabled: true, + confidenceThreshold: 0.9, + augmentationTtlDays: 7, + maxActiveAugmentations: 5, + successStreakToExpire: 4, + }); + }); + it('should handle NW_REVIEWER_MAX_RETRIES=0 env var', () => { process.env.NW_REVIEWER_MAX_RETRIES = '0'; diff --git a/packages/core/src/__tests__/feedback/outcome-parser.test.ts b/packages/core/src/__tests__/feedback/outcome-parser.test.ts new file mode 100644 index 00000000..491850c6 --- /dev/null +++ b/packages/core/src/__tests__/feedback/outcome-parser.test.ts @@ -0,0 +1,76 @@ +/** + * Tests for structured outcome parsing. + */ + +import { describe, expect, it } from 'vitest'; + +import { buildSessionOutcomeInput, classifyFailure } from '../../feedback/outcome-parser.js'; +import { parseScriptResult } from '../../utils/script-result.js'; + +describe('outcome parser', () => { + it('should classify TypeScript errors', () => { + const stderr = ` +packages/core/src/feedback/outcome-parser.ts:42:7 - error TS2322: Type 'string' is not assignable to type 'number'. +`; + + const result = classifyFailure({ + projectPath: '/tmp/night-watch', + stderr, + }); + + expect(result.category).toBe('typescript'); + expect(result.failureSignature).toContain('typescript|packages/core/src'); + expect(result.failureSignature).toContain('ts2322'); + }); + + it.each([ + ['test', 'FAIL src/example.test.ts > expected true to be false'], + ['ci', 'GitHub Actions required check failed with action_required'], + ['review-score', 'review score below threshold: final_score=72'], + ['rate-limit', '429 rate limit exceeded by provider'], + ['timeout', 'operation timed out with exit code 124'], + ['conflict', 'Automatic merge failed; fix conflicts and then commit the result.'], + ['unknown', 'provider exited without a recognized failure marker'], + ] as const)('should classify %s failures', (expectedCategory, stderr) => { + const result = classifyFailure({ + projectPath: '/tmp/night-watch', + stderr, + exitCode: expectedCategory === 'timeout' ? 124 : 1, + minReviewScore: expectedCategory === 'review-score' ? 80 : undefined, + scriptResult: + expectedCategory === 'review-score' + ? parseScriptResult('NIGHT_WATCH_RESULT:failure|final_score=72') + : null, + }); + + expect(result.category).toBe(expectedCategory); + expect(result.failureSignature).toContain(`${expectedCategory}|`); + }); + + it('should classify ESLint errors', () => { + const stdout = ` +/tmp/night-watch/packages/cli/src/commands/run.ts + 12:8 error 'unused' is assigned a value but never used @typescript-eslint/no-unused-vars + +✖ 1 problem (1 error, 0 warnings) +`; + + const result = buildSessionOutcomeInput({ + projectPath: '/tmp/night-watch', + jobType: 'executor', + providerKey: 'codex', + startedAt: 1_700_000_000_000, + finishedAt: 1_700_000_001_500, + exitCode: 1, + stdout, + scriptResult: parseScriptResult('NIGHT_WATCH_RESULT:failure|prd=97.md|branch=nw-97'), + }); + + expect(result.outcome).toBe('failure'); + expect(result.failureCategory).toBe('eslint'); + expect(result.failureSignature).toContain('eslint|packages/cli/src'); + expect(result.durationSeconds).toBe(2); + expect(result.prdFile).toBe('97.md'); + expect(result.branchName).toBe('nw-97'); + }); +}); diff --git a/packages/core/src/__tests__/feedback/pattern-analyzer.test.ts b/packages/core/src/__tests__/feedback/pattern-analyzer.test.ts new file mode 100644 index 00000000..600ff456 --- /dev/null +++ b/packages/core/src/__tests__/feedback/pattern-analyzer.test.ts @@ -0,0 +1,87 @@ +/** + * Tests for feedback pattern detection and activation. + */ + +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; + +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { analyzeFeedbackOutcome } from '../../feedback/pattern-analyzer.js'; +import { SqliteSessionOutcomeRepository } from '../../storage/repositories/sqlite/session-outcome.repository.js'; +import { runMigrations } from '../../storage/sqlite/migrations.js'; + +describe('feedback pattern analyzer', () => { + let db: Database.Database; + let repo: SqliteSessionOutcomeRepository; + let tempDir: string; + + beforeEach(() => { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'nw-pattern-analyzer-test-')); + db = new Database(path.join(tempDir, 'test.db')); + db.pragma('journal_mode = WAL'); + runMigrations(db); + repo = new SqliteSessionOutcomeRepository(db); + }); + + afterEach(() => { + db.close(); + fs.rmSync(tempDir, { recursive: true, force: true }); + }); + + it('should activate pattern after repeated failures', () => { + const firstOutcome = repo.insertOutcome({ + failureCategory: 'test', + failureSignature: 'test|packages/core/src|expected true to be false', + finishedAt: 1_700_000_010, + jobType: 'executor', + metadata: { + fileArea: 'packages/core/src', + firstErrorLine: 'expected true to be false', + }, + outcome: 'failure', + projectPath: '/tmp/project', + providerKey: 'codex', + startedAt: 1_700_000_000, + }); + + const firstResult = analyzeFeedbackOutcome(repo, firstOutcome, { + now: 1_700_000_010, + }); + expect(firstResult.pattern?.status).toBe('observing'); + expect(repo.listActiveAugmentations('/tmp/project', 'executor', 1_700_000_010)).toHaveLength(0); + + const secondOutcome = repo.insertOutcome({ + failureCategory: 'test', + failureSignature: 'test|packages/core/src|expected true to be false', + finishedAt: 1_700_000_030, + jobType: 'executor', + metadata: { + fileArea: 'packages/core/src', + firstErrorLine: 'expected true to be false', + }, + outcome: 'failure', + projectPath: '/tmp/project', + providerKey: 'codex', + startedAt: 1_700_000_020, + }); + + const secondResult = analyzeFeedbackOutcome(repo, secondOutcome, { + now: 1_700_000_030, + }); + const activeAugmentations = repo.listActiveAugmentations( + '/tmp/project', + 'executor', + 1_700_000_030, + ); + + expect(secondResult.pattern?.sampleCount).toBe(2); + expect(secondResult.pattern?.status).toBe('active'); + expect(secondResult.pattern?.confidence).toBeGreaterThanOrEqual(0.75); + expect(activeAugmentations).toHaveLength(1); + expect(activeAugmentations[0].promptText).toContain('Provenance: pattern #'); + expect(activeAugmentations[0].promptText).toContain('samples=2'); + }); +}); diff --git a/packages/core/src/__tests__/feedback/prompt-augmenter.test.ts b/packages/core/src/__tests__/feedback/prompt-augmenter.test.ts new file mode 100644 index 00000000..bfe1334f --- /dev/null +++ b/packages/core/src/__tests__/feedback/prompt-augmenter.test.ts @@ -0,0 +1,60 @@ +/** + * Tests for project feedback prompt augmentation rendering. + */ + +import { describe, expect, it } from 'vitest'; + +import { + renderProjectFeedbackBlock, + selectPromptAugmentations, +} from '../../feedback/prompt-augmenter.js'; +import type { IPromptAugmentation } from '../../types.js'; + +function makeAugmentation( + id: number, + promptText: string, + status: IPromptAugmentation['status'] = 'active', +): IPromptAugmentation { + return { + appliedCount: 0, + createdAt: id, + expiresAt: null, + id, + jobType: 'executor', + patternId: id, + projectPath: '/tmp/project', + promptText, + status, + successCount: 0, + updatedAt: id, + }; +} + +describe('prompt augmenter', () => { + it('should cap active prompt snippets', () => { + const augmentations = [ + makeAugmentation(1, 'first repeated failure note'), + makeAugmentation(2, 'second repeated failure note'), + makeAugmentation(3, 'third repeated failure note'), + makeAugmentation(4, 'fourth repeated failure note'), + ]; + + const selected = selectPromptAugmentations(augmentations); + const block = renderProjectFeedbackBlock(augmentations); + + expect(selected.map((augmentation) => augmentation.id)).toEqual([1, 2, 3]); + expect(block).toContain('## Project Feedback'); + expect(block).toContain('first repeated failure note'); + expect(block).toContain('third repeated failure note'); + expect(block).not.toContain('fourth repeated failure note'); + }); + + it('should render prompt block only when augmentations are active', () => { + expect(renderProjectFeedbackBlock([])).toBe(''); + expect(renderProjectFeedbackBlock([makeAugmentation(1, 'paused note', 'paused')])).toBe(''); + + const block = renderProjectFeedbackBlock([makeAugmentation(1, 'active note')]); + expect(block).toContain('## Project Feedback'); + expect(block).toContain('active note'); + }); +}); diff --git a/packages/core/src/__tests__/feedback/session-outcomes.test.ts b/packages/core/src/__tests__/feedback/session-outcomes.test.ts new file mode 100644 index 00000000..38b5d5ee --- /dev/null +++ b/packages/core/src/__tests__/feedback/session-outcomes.test.ts @@ -0,0 +1,116 @@ +/** + * Tests for structured session outcome storage. + */ + +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; + +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { SqliteSessionOutcomeRepository } from '../../storage/repositories/sqlite/session-outcome.repository.js'; +import { runMigrations } from '../../storage/sqlite/migrations.js'; + +describe('SqliteSessionOutcomeRepository', () => { + let db: Database.Database; + let repo: SqliteSessionOutcomeRepository; + let tempDir: string; + + beforeEach(() => { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'nw-session-outcomes-test-')); + db = new Database(path.join(tempDir, 'test.db')); + db.pragma('journal_mode = WAL'); + runMigrations(db); + repo = new SqliteSessionOutcomeRepository(db); + }); + + afterEach(() => { + db.close(); + fs.rmSync(tempDir, { recursive: true, force: true }); + }); + + it('should record structured session outcome', () => { + const outcome = repo.insertOutcome({ + projectPath: '/tmp/project', + jobType: 'executor', + providerKey: 'codex', + prdFile: 'docs/prds/example.md', + prNumber: 97, + branchName: 'night-watch/nw-97', + startedAt: 1_700_000_000, + finishedAt: 1_700_000_090, + durationSeconds: 90, + outcome: 'failure', + exitCode: 1, + attempt: 2, + retryCount: 1, + failureCategory: 'tests', + failureSignature: 'vitest failed in session-outcomes.test.ts', + metadata: { + command: 'yarn workspace @night-watch/core test', + failures: ['expected true to be false'], + }, + }); + + expect(outcome.id).toBeGreaterThan(0); + expect(outcome.projectPath).toBe('/tmp/project'); + expect(outcome.jobType).toBe('executor'); + expect(outcome.providerKey).toBe('codex'); + expect(outcome.prdFile).toBe('docs/prds/example.md'); + expect(outcome.prNumber).toBe(97); + expect(outcome.durationSeconds).toBe(90); + expect(outcome.outcome).toBe('failure'); + expect(outcome.attempt).toBe(2); + expect(outcome.retryCount).toBe(1); + expect(outcome.metadata).toEqual({ + command: 'yarn workspace @night-watch/core test', + failures: ['expected true to be false'], + }); + + const queried = repo.queryOutcomes({ projectPath: '/tmp/project', jobType: 'executor' }); + expect(queried).toHaveLength(1); + expect(queried[0]).toEqual(outcome); + + const summary = repo.querySummary({ projectPath: '/tmp/project' }); + expect(summary.totalCount).toBe(1); + expect(summary.failureCount).toBe(1); + expect(summary.byFailureCategory).toEqual({ tests: 1 }); + expect(summary.averageDurationSeconds).toBe(90); + }); + + it('should redact secrets in metadata', () => { + const outcome = repo.insertOutcome({ + projectPath: '/tmp/project', + jobType: 'reviewer', + providerKey: 'claude', + startedAt: 1_700_000_000, + finishedAt: 1_700_000_030, + outcome: 'failure', + metadata: { + apiKey: 'sk-1234567890abcdefghijklmnopqrstuvwxyz', + nested: { + authorization: 'Bearer secret-token-value-12345', + log: 'request failed with token=ghp_abcdefghijklmnopqrstuvwxyz1234567890ABCD', + }, + safe: 'keep this value', + }, + }); + + expect(outcome.metadata).toEqual({ + apiKey: '[REDACTED_SECRET]', + nested: { + authorization: '[REDACTED_SECRET]', + log: 'request failed with token=[REDACTED_SECRET]', + }, + safe: 'keep this value', + }); + + const raw = db + .prepare('SELECT metadata_json FROM session_outcomes WHERE id = ?') + .get(outcome.id) as { metadata_json: string }; + expect(raw.metadata_json).not.toContain('sk-1234567890abcdefghijklmnopqrstuvwxyz'); + expect(raw.metadata_json).not.toContain('secret-token-value-12345'); + expect(raw.metadata_json).not.toContain('ghp_abcdefghijklmnopqrstuvwxyz1234567890ABCD'); + }); +}); diff --git a/packages/core/src/__tests__/storage/sqlite/migrations.test.ts b/packages/core/src/__tests__/storage/sqlite/migrations.test.ts index 24592aa3..c8fcfdeb 100644 --- a/packages/core/src/__tests__/storage/sqlite/migrations.test.ts +++ b/packages/core/src/__tests__/storage/sqlite/migrations.test.ts @@ -13,14 +13,17 @@ import { runMigrations } from '../../../storage/sqlite/migrations.js'; const EXPECTED_TABLES = [ 'agent_personas', 'execution_history', + 'feedback_patterns', 'job_queue', 'job_runs', 'kanban_comments', 'kanban_issues', 'prd_states', 'projects', + 'prompt_augmentations', 'roadmap_states', 'schema_meta', + 'session_outcomes', ]; let tmpDir: string; @@ -97,9 +100,12 @@ describe('runMigrations', () => { it('creates job_runs table with correct columns', () => { runMigrations(db); - const columns = db - .prepare(`PRAGMA table_info(job_runs)`) - .all() as Array<{ name: string; type: string; notnull: number; dflt_value: string | null }>; + const columns = db.prepare(`PRAGMA table_info(job_runs)`).all() as Array<{ + name: string; + type: string; + notnull: number; + dflt_value: string | null; + }>; const colNames = columns.map((c) => c.name); expect(colNames).toEqual( @@ -130,9 +136,7 @@ describe('runMigrations', () => { it('creates job_queue table without pressure columns', () => { runMigrations(db); - const columns = db - .prepare(`PRAGMA table_info(job_queue)`) - .all() as Array<{ name: string }>; + const columns = db.prepare(`PRAGMA table_info(job_queue)`).all() as Array<{ name: string }>; const colNames = columns.map((c) => c.name); @@ -153,4 +157,53 @@ describe('runMigrations', () => { expect(indexes.map((i) => i.name)).toContain('idx_job_runs_lookup'); }); + + it('creates feedback-loop tables and lookup indexes', () => { + runMigrations(db); + + const outcomeColumns = db.prepare(`PRAGMA table_info(session_outcomes)`).all() as Array<{ + name: string; + dflt_value: string | null; + }>; + const outcomeIndexes = db + .prepare(`SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='session_outcomes'`) + .all() as Array<{ name: string }>; + const patternIndexes = db + .prepare(`SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='feedback_patterns'`) + .all() as Array<{ name: string }>; + const augmentationIndexes = db + .prepare( + `SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='prompt_augmentations'`, + ) + .all() as Array<{ name: string }>; + + expect(outcomeColumns.map((c) => c.name)).toEqual( + expect.arrayContaining([ + 'id', + 'project_path', + 'job_type', + 'provider_key', + 'prd_file', + 'pr_number', + 'branch_name', + 'started_at', + 'finished_at', + 'duration_seconds', + 'outcome', + 'exit_code', + 'attempt', + 'retry_count', + 'review_score', + 'ci_status', + 'failure_category', + 'failure_signature', + 'metadata_json', + ]), + ); + expect(outcomeColumns.find((c) => c.name === 'attempt')?.dflt_value).toBe('1'); + expect(outcomeColumns.find((c) => c.name === 'retry_count')?.dflt_value).toBe('0'); + expect(outcomeIndexes.map((i) => i.name)).toContain('idx_session_outcomes_lookup'); + expect(patternIndexes.map((i) => i.name)).toContain('idx_feedback_patterns_lookup'); + expect(augmentationIndexes.map((i) => i.name)).toContain('idx_prompt_augmentations_active'); + }); }); diff --git a/packages/core/src/config-env.ts b/packages/core/src/config-env.ts index 1c39f752..a795686a 100644 --- a/packages/core/src/config-env.ts +++ b/packages/core/src/config-env.ts @@ -103,6 +103,38 @@ export function buildEnvOverrideConfig( const v = parseInt(process.env.NW_REVIEWER_MAX_PRS_PER_RUN, 10); if (!isNaN(v) && v >= 0) env.reviewerMaxPrsPerRun = v; } + if ( + process.env.NW_FEEDBACK_ENABLED !== undefined || + process.env.NW_FEEDBACK_CONFIDENCE_THRESHOLD !== undefined || + process.env.NW_FEEDBACK_AUGMENTATION_TTL_DAYS !== undefined || + process.env.NW_FEEDBACK_MAX_ACTIVE_AUGMENTATIONS !== undefined || + process.env.NW_FEEDBACK_SUCCESS_STREAK_TO_EXPIRE !== undefined + ) { + const feedback = { ...(fileConfig?.feedback ?? {}) }; + const enabled = process.env.NW_FEEDBACK_ENABLED + ? parseBoolean(process.env.NW_FEEDBACK_ENABLED) + : null; + if (enabled !== null) feedback.enabled = enabled; + const confidenceThreshold = parseFloat(process.env.NW_FEEDBACK_CONFIDENCE_THRESHOLD ?? ''); + if (!Number.isNaN(confidenceThreshold)) feedback.confidenceThreshold = confidenceThreshold; + const augmentationTtlDays = parseInt(process.env.NW_FEEDBACK_AUGMENTATION_TTL_DAYS ?? '', 10); + if (!Number.isNaN(augmentationTtlDays)) feedback.augmentationTtlDays = augmentationTtlDays; + const maxActiveAugmentations = parseInt( + process.env.NW_FEEDBACK_MAX_ACTIVE_AUGMENTATIONS ?? '', + 10, + ); + if (!Number.isNaN(maxActiveAugmentations)) { + feedback.maxActiveAugmentations = maxActiveAugmentations; + } + const successStreakToExpire = parseInt( + process.env.NW_FEEDBACK_SUCCESS_STREAK_TO_EXPIRE ?? '', + 10, + ); + if (!Number.isNaN(successStreakToExpire)) { + feedback.successStreakToExpire = successStreakToExpire; + } + env.feedback = feedback as INightWatchConfig['feedback']; + } if (process.env.NW_PROVIDER) { const p = validateProvider(process.env.NW_PROVIDER); diff --git a/packages/core/src/config-normalize.ts b/packages/core/src/config-normalize.ts index 5bca9019..e18a9f78 100644 --- a/packages/core/src/config-normalize.ts +++ b/packages/core/src/config-normalize.ts @@ -94,6 +94,16 @@ export function normalizeConfig(rawConfig: Record): Partial; +} + +export interface IFailureClassification { + category: FailureCategory; + failureSignature: string; + fileArea: string | null; + firstErrorLine: string | null; +} + +export interface IFailureClassificationInput { + projectPath: string; + stdout?: string; + stderr?: string; + scriptResult?: IScriptResult | null; + minReviewScore?: number; + exitCode?: number; +} + +interface IClassifierRule { + category: FailureCategory; + pattern: RegExp; +} + +const SECRET_PLACEHOLDER = '[REDACTED_SECRET]'; +const MAX_SIGNATURE_LENGTH = 240; +const MAX_ERROR_LINE_LENGTH = 300; +const ANSI_PATTERN = new RegExp(`${String.fromCharCode(27)}\\[[0-9;]*m`, 'g'); +const FILE_PATH_PATTERN = /\.(?:[cm]?[jt]sx?|json|md|css|scss|ya?ml)$/i; +const TOKEN_SPLIT_PATTERN = /[\s('"`]+/; + +const SECRET_TEXT_PATTERNS: Array<[RegExp, string]> = [ + [ + /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g, + SECRET_PLACEHOLDER, + ], + [/\bsk-ant-[\w-]{20,}\b/g, SECRET_PLACEHOLDER], + [/\bsk-[\w-]{20,}\b/g, SECRET_PLACEHOLDER], + [/\bgh[opsru]_\w{30,}\b/g, SECRET_PLACEHOLDER], + [/\bxox[baprs]-[\w-]{20,}\b/g, SECRET_PLACEHOLDER], + [/\b(?:AKIA|ASIA)[A-Z0-9]{16}\b/g, SECRET_PLACEHOLDER], + [/\b(Bearer|Basic)\s+[\w.~+/=-]{12,}/gi, `$1 ${SECRET_PLACEHOLDER}`], + [/\b(token|api[_-]?key|password|secret)=["']?[\w.~+/=-]{12,}/gi, `$1=${SECRET_PLACEHOLDER}`], +]; + +const CLASSIFIER_RULES: IClassifierRule[] = [ + { + category: 'timeout', + pattern: + /\b(timed?\s*out|timeout|etimedout|operation was aborted|exit code 124|signal sigterm)\b/i, + }, + { + category: 'rate-limit', + pattern: + /\b(429|rate[- ]?limit(?:ed)?|too many requests|quota exceeded|resource_exhausted|overloaded)\b/i, + }, + { + category: 'conflict', + pattern: + /\b(merge conflict|conflict \(|conflict:|unmerged files|needs merge|automatic merge failed|both modified:)\b/i, + }, + { + category: 'typescript', + pattern: /\b(TS\d{4}|typescript error|tsc\b.*(?:failed|error)|error TS\d{4})\b/i, + }, + { + category: 'eslint', + pattern: + /\b(eslint|@typescript-eslint|no-unused-vars|no-explicit-any|react-hooks\/rules-of-hooks)\b/i, + }, + { + category: 'test', + pattern: /\b(vitest|jest|playwright|cypress|mocha|assertionerror)\b/i, + }, + { + category: 'test', + pattern: /\b(test files?|tests?)\b.*\bfailed\b/i, + }, + { + category: 'test', + pattern: /\b(expect\(|locator\(|FAIL\s+\S+\.(?:test|spec)\.)/i, + }, + { + category: 'review-score', + pattern: + /\b(review score|final_score|score)\b.*\b(below|minimum|min|required|threshold|failed|miss)\b/i, + }, + { + category: 'ci', + pattern: + /\b(ci|github actions|workflow|status check|required check|check run|failing checks?)\b.*\b(fail|error|cancel|timed out|action_required)\b/i, + }, +]; + +function stripAnsi(value: string): string { + return value.replace(ANSI_PATTERN, ''); +} + +export function redactOutcomeText(value: string): string { + return SECRET_TEXT_PATTERNS.reduce( + (current, [pattern, replacement]) => current.replace(pattern, replacement), + value, + ); +} + +function trimLine(value: string, maxLength: number): string { + return value.length <= maxLength ? value : `${value.slice(0, maxLength - 3)}...`; +} + +function normalizeLine(value: string, projectPath: string): string { + let normalized = stripAnsi(redactOutcomeText(value)).trim(); + if (projectPath) { + normalized = normalized.replaceAll(projectPath, ''); + } + + return trimLine( + normalized + .replace(/:\d+:\d+/g, '::') + .replace(/:\d+\b/g, ':') + .replace(/\b0x[0-9a-f]+\b/gi, '') + .replace(/\b\d{4,}\b/g, '') + .replace(/\s+/g, ' ') + .toLowerCase(), + MAX_ERROR_LINE_LENGTH, + ); +} + +function getOutputLines(stdout: string | undefined, stderr: string | undefined): string[] { + return `${stdout ?? ''}\n${stderr ?? ''}` + .split(/\r?\n/) + .map((line) => stripAnsi(redactOutcomeText(line)).trim()) + .filter((line) => line.length > 0 && !line.startsWith('NIGHT_WATCH_RESULT:')); +} + +function extractFilePath(line: string, projectPath: string): string | null { + const normalizedLine = line.replaceAll('\\', '/'); + const normalizedProjectPath = projectPath.replaceAll('\\', '/'); + + if (normalizedProjectPath) { + const projectPrefix = `${normalizedProjectPath}/`; + const projectIndex = normalizedLine.indexOf(projectPrefix); + if (projectIndex >= 0) { + const relativeLine = normalizedLine.slice(projectIndex + projectPrefix.length); + const relativePath = extractFilePathToken(relativeLine.split(TOKEN_SPLIT_PATTERN)); + if (relativePath) { + return relativePath; + } + } + } + + return extractFilePathToken(normalizedLine.split(TOKEN_SPLIT_PATTERN)); +} + +function extractFilePathToken(tokens: string[]): string | null { + for (const token of tokens) { + const withoutLocation = cleanFilePathToken(token); + if (FILE_PATH_PATTERN.test(withoutLocation)) { + return withoutLocation; + } + } + return null; +} + +function cleanFilePathToken(token: string): string { + let candidate = token; + const locationIndex = findLocationIndex(candidate); + if (locationIndex >= 0) { + candidate = candidate.slice(0, locationIndex); + } + + while (candidate.startsWith('(') || candidate.startsWith('[') || candidate.startsWith('{')) { + candidate = candidate.slice(1); + } + while (candidate.endsWith(')') || candidate.endsWith(',') || candidate.endsWith(';')) { + candidate = candidate.slice(0, -1); + } + return candidate.startsWith('./') ? candidate.slice(2) : candidate; +} + +function findLocationIndex(value: string): number { + for (let index = 0; index < value.length - 1; index += 1) { + if (value[index] === ':' && value[index + 1] >= '0' && value[index + 1] <= '9') { + return index; + } + } + return -1; +} + +function filePathToArea(filePath: string | null): string | null { + if (!filePath) { + return null; + } + + const segments = filePath.split('/').filter(Boolean); + if (segments.length <= 1) { + return '.'; + } + return segments.slice(0, Math.min(segments.length - 1, 3)).join('/'); +} + +function findFirstMatchingLine(lines: string[], category: FailureCategory): string | null { + const rule = CLASSIFIER_RULES.find((entry) => entry.category === category); + if (rule) { + const matched = lines.find((line) => rule.pattern.test(line)); + if (matched) { + return matched; + } + } + + return ( + lines.find((line) => + /\b(error|failed|failure|fatal|exception|conflict|timeout)\b/i.test(line), + ) ?? + lines[0] ?? + null + ); +} + +function classifyCategory( + lines: string[], + scriptResult: IScriptResult | null | undefined, + minReviewScore: number | undefined, + exitCode: number | undefined, +): FailureCategory { + const status = scriptResult?.status ?? ''; + const data = scriptResult?.data ?? {}; + const combined = [...lines, status, data.reason ?? '', data.detail ?? ''].join('\n'); + + if (exitCode === 124 || status === 'timeout') { + return 'timeout'; + } + if (status === 'rate_limited' || data.rate_limit_fallback === '1') { + return 'rate-limit'; + } + + const reviewScore = parseOptionalNumber(data.final_score ?? data.review_score); + if ( + reviewScore != null && + minReviewScore != null && + Number.isFinite(minReviewScore) && + reviewScore < minReviewScore + ) { + return 'review-score'; + } + + for (const rule of CLASSIFIER_RULES) { + if (rule.pattern.test(combined)) { + return rule.category; + } + } + + return 'unknown'; +} + +export function classifyFailure(input: IFailureClassificationInput): IFailureClassification { + const lines = getOutputLines(input.stdout, input.stderr); + const category = classifyCategory( + lines, + input.scriptResult, + input.minReviewScore, + input.exitCode, + ); + const firstErrorLine = findFirstMatchingLine(lines, category); + const filePath = + (firstErrorLine ? extractFilePath(firstErrorLine, input.projectPath) : null) ?? + lines.map((line) => extractFilePath(line, input.projectPath)).find((value) => value != null) ?? + null; + const fileArea = filePathToArea(filePath); + const normalizedLine = firstErrorLine + ? normalizeLine(firstErrorLine, input.projectPath) + : 'no-error-line'; + const failureSignature = trimLine( + `${category}|${fileArea ?? 'unknown-area'}|${normalizedLine}`, + MAX_SIGNATURE_LENGTH, + ); + + return { + category, + failureSignature, + fileArea, + firstErrorLine: firstErrorLine ? trimLine(firstErrorLine, MAX_ERROR_LINE_LENGTH) : null, + }; +} + +function parseOptionalNumber(value: string | undefined): number | null { + if (!value) { + return null; + } + const normalized = value.trim().replace(/^#/, ''); + const parsed = parseInt(normalized, 10); + return Number.isNaN(parsed) ? null : parsed; +} + +function parseFirstPrNumber(scriptResult: IScriptResult | null | undefined): number | null { + const data = scriptResult?.data ?? {}; + const direct = + parseOptionalNumber(data.pr_number) ?? + parseOptionalNumber(data.prNumber) ?? + parseOptionalNumber(data.pr) ?? + parseOptionalNumber(data.failed_pr); + if (direct != null) { + return direct; + } + + const urlMatch = data.pr_url?.match(/\/pull\/(\d+)/); + if (urlMatch?.[1]) { + return parseOptionalNumber(urlMatch[1]); + } + + const prsRaw = data.prs ?? data.auto_merged; + if (!prsRaw) { + return null; + } + const firstToken = prsRaw.split(',').find((token) => parseOptionalNumber(token) != null); + return parseOptionalNumber(firstToken); +} + +function parseAttemptCount( + scriptResult: IScriptResult | null | undefined, + lines: string[], +): number { + const fromData = + parseOptionalNumber(scriptResult?.data.attempt) ?? + parseOptionalNumber(scriptResult?.data.attempts); + if (fromData != null && fromData > 0) { + return fromData; + } + + let maxAttempt = 1; + for (const line of lines) { + const match = /\bATTEMPT:\s*(\d+)\//i.exec(line) ?? /\bStarting attempt\s+(\d+)\//i.exec(line); + if (match?.[1]) { + maxAttempt = Math.max(maxAttempt, parseInt(match[1], 10)); + } + } + return maxAttempt; +} + +function parseRetryCount(scriptResult: IScriptResult | null | undefined, attempt: number): number { + const retryCount = parseOptionalNumber(scriptResult?.data.retry_count); + if (retryCount != null && retryCount >= 0) { + return retryCount; + } + return Math.max(0, attempt - 1); +} + +function markerIndicatesFailure(scriptResult: IScriptResult | null | undefined): boolean { + const data = scriptResult?.data ?? {}; + const positiveFailureCount = + (parseOptionalNumber(data.failed) ?? 0) > 0 || + (parseOptionalNumber(data.prs_failed) ?? 0) > 0 || + (parseOptionalNumber(data.failed_count) ?? 0) > 0; + if (positiveFailureCount) { + return true; + } + + return [data.failed_pr, data.auto_merge_failed, data.failed_automation] + .filter((value): value is string => typeof value === 'string') + .some((value) => value.trim().length > 0 && value.trim().toLowerCase() !== 'none'); +} + +function determineOutcome( + exitCode: number, + scriptResult: IScriptResult | null | undefined, + category: FailureCategory, +): SessionOutcomeStatus { + const status = scriptResult?.status ?? ''; + if (status === 'queued' || status.startsWith('skip_')) { + return 'skipped'; + } + if (exitCode === 124 || status === 'timeout' || category === 'timeout') { + return 'timeout'; + } + if (status === 'rate_limited' || (exitCode !== 0 && category === 'rate-limit')) { + return 'rate_limited'; + } + if ( + status.startsWith('failure') || + category === 'review-score' || + markerIndicatesFailure(scriptResult) + ) { + return 'failure'; + } + return exitCode === 0 ? 'success' : 'failure'; +} + +function redactMetadata(metadata: Record): Record { + return JSON.parse(redactOutcomeText(JSON.stringify(metadata))) as Record; +} + +export function buildSessionOutcomeInput(input: IOutcomeParserInput): ISessionOutcomeInsertInput { + const lines = getOutputLines(input.stdout, input.stderr); + const classification = classifyFailure(input); + const outcome = determineOutcome(input.exitCode, input.scriptResult, classification.category); + const attempt = parseAttemptCount(input.scriptResult, lines); + const retryCount = parseRetryCount(input.scriptResult, attempt); + const reviewScore = parseOptionalNumber( + input.scriptResult?.data.final_score ?? input.scriptResult?.data.review_score, + ); + const failureCategory = + outcome === 'failure' || outcome === 'timeout' || outcome === 'rate_limited' + ? classification.category + : null; + + return { + projectPath: input.projectPath, + jobType: input.jobType, + providerKey: input.providerKey || 'unknown', + prdFile: input.scriptResult?.data.prd ?? input.scriptResult?.data.prd_file ?? null, + prNumber: parseFirstPrNumber(input.scriptResult), + branchName: input.scriptResult?.data.branch ?? null, + startedAt: input.startedAt, + finishedAt: input.finishedAt, + durationSeconds: Math.max(0, Math.round((input.finishedAt - input.startedAt) / 1000)), + outcome, + exitCode: input.exitCode, + attempt, + retryCount, + reviewScore, + ciStatus: failureCategory === 'ci' ? 'fail' : (input.scriptResult?.data.ci_status ?? null), + failureCategory, + failureSignature: failureCategory ? classification.failureSignature : null, + metadata: redactMetadata({ + ...(input.metadata ?? {}), + scriptStatus: input.scriptResult?.status ?? null, + scriptData: input.scriptResult?.data ?? {}, + minReviewScore: input.minReviewScore ?? null, + firstErrorLine: classification.firstErrorLine, + fileArea: classification.fileArea, + }), + }; +} diff --git a/packages/core/src/feedback/pattern-analyzer.ts b/packages/core/src/feedback/pattern-analyzer.ts new file mode 100644 index 00000000..15ebd729 --- /dev/null +++ b/packages/core/src/feedback/pattern-analyzer.ts @@ -0,0 +1,399 @@ +/** + * Deterministic feedback pattern detection from stored session outcomes. + */ + +import type { ISessionOutcomeRepository } from '@/storage/repositories/interfaces.js'; +import type { IFeedbackPattern, IPromptAugmentation, ISessionOutcome, JobType } from '@/types.js'; + +const DEFAULT_CONFIDENCE_THRESHOLD = 0.75; +const DEFAULT_AUGMENTATION_TTL_MS = 14 * 24 * 60 * 60 * 1000; +const DEFAULT_MAX_ACTIVE_AUGMENTATIONS = 3; +const DEFAULT_SUCCESS_STREAK_TO_EXPIRE = 3; +const RECENT_WINDOW_MS = 7 * 24 * 60 * 60 * 1000; +const STALE_WINDOW_MS = 14 * 24 * 60 * 60 * 1000; +const MAX_PATTERN_TEXT_LENGTH = 180; +const MAX_SIGNATURE_PROMPT_LENGTH = 90; + +export interface IFeedbackPatternAnalysisOptions { + augmentationTtlMs?: number; + confidenceThreshold?: number; + maxActiveAugmentations?: number; + now?: number; + successStreakToExpire?: number; +} + +export interface IFeedbackPatternAnalysisResult { + augmentation: IPromptAugmentation | null; + expiredAugmentationIds: number[]; + pattern: IFeedbackPattern | null; +} + +interface IFailureStreakStats { + failureStreak: number; + signatureStreak: number; +} + +function isFailureOutcome(outcome: ISessionOutcome): boolean { + return ( + outcome.outcome === 'failure' || + outcome.outcome === 'timeout' || + outcome.outcome === 'rate_limited' + ); +} + +function truncateText(value: string, maxLength = MAX_PATTERN_TEXT_LENGTH): string { + const normalized = value.replace(/\s+/g, ' ').trim(); + if (normalized.length <= maxLength) { + return normalized; + } + return `${normalized.slice(0, maxLength - 3).trimEnd()}...`; +} + +function getStringMetadata(metadata: Record, key: string): string | null { + const value = metadata[key]; + return typeof value === 'string' && value.trim().length > 0 ? value : null; +} + +function getFileArea(outcome: ISessionOutcome): string { + return getStringMetadata(outcome.metadata, 'fileArea') ?? 'unknown area'; +} + +function countRecentStreaks( + repository: ISessionOutcomeRepository, + outcome: ISessionOutcome, +): IFailureStreakStats { + const recentOutcomes = repository.queryOutcomes({ + projectPath: outcome.projectPath, + jobType: outcome.jobType, + limit: 25, + }); + + let failureStreak = 0; + let signatureStreak = 0; + + for (const recentOutcome of recentOutcomes) { + if (!isFailureOutcome(recentOutcome)) { + break; + } + failureStreak += 1; + + if (recentOutcome.failureSignature === outcome.failureSignature) { + signatureStreak += 1; + } else { + break; + } + } + + return { failureStreak, signatureStreak }; +} + +function countSuccessStreak( + repository: ISessionOutcomeRepository, + projectPath: string, + jobType: JobType, +): number { + const recentOutcomes = repository.queryOutcomes({ projectPath, jobType, limit: 25 }); + let successStreak = 0; + for (const outcome of recentOutcomes) { + if (outcome.outcome !== 'success') { + break; + } + successStreak += 1; + } + return successStreak; +} + +function calculateRecencyScore(now: number, lastSeenAt: number): number { + const ageMs = Math.max(0, now - lastSeenAt); + if (ageMs <= RECENT_WINDOW_MS) { + return 1; + } + if (ageMs <= STALE_WINDOW_MS) { + return 0.5; + } + return 0.15; +} + +function calculateConfidence( + sampleCount: number, + lastSeenAt: number, + failureStreak: number, + signatureStreak: number, + now: number, +): number { + const sampleScore = Math.min(1, sampleCount / 2); + const streakScore = Math.min(1, Math.max(failureStreak, signatureStreak) / 2); + const recencyScore = calculateRecencyScore(now, lastSeenAt); + const confidence = sampleScore * 0.45 + streakScore * 0.35 + recencyScore * 0.2; + return Math.round(Math.min(1, confidence) * 100) / 100; +} + +function buildPatternTitle(outcome: ISessionOutcome): string { + const category = outcome.failureCategory ?? 'unknown'; + return truncateText(`Repeated ${category} failure in ${getFileArea(outcome)}`, 120); +} + +function buildPatternDescription(outcome: ISessionOutcome, sampleCount: number): string { + return truncateText( + `Failure signature has appeared ${sampleCount} times for ${outcome.jobType} sessions.`, + ); +} + +function buildAugmentationPrompt(pattern: IFeedbackPattern, outcome: ISessionOutcome): string { + const category = outcome.failureCategory ?? pattern.category; + const fileArea = getFileArea(outcome); + const signature = truncateText( + outcome.failureSignature ?? pattern.patternKey, + MAX_SIGNATURE_PROMPT_LENGTH, + ); + const confidencePercent = Math.round(pattern.confidence * 100); + + const actionByCategory: Record = { + ci: 'Check failing CI details before broad edits and prioritize the repeated failure area.', + conflict: 'Check merge conflicts before editing and resolve the repeated conflict area first.', + eslint: 'Run lint early and fix the repeated ESLint issue before final verification.', + 'rate-limit': + 'Avoid unnecessary provider calls and continue with local evidence when rate limits appear.', + 'review-score': + 'Read prior low-score review feedback before declaring the PR ready and address repeated concerns.', + test: 'Run the targeted test area early and fix the repeated failure before final verification.', + timeout: 'Keep the work scoped and verify incrementally because prior sessions timed out.', + typescript: + 'Run typecheck early and fix the repeated TypeScript issue before final verification.', + unknown: 'Investigate the repeated failure signature before making broad changes.', + }; + + return truncateText( + `${actionByCategory[category] ?? actionByCategory.unknown} Area: ${fileArea}. Provenance: pattern #${pattern.id}, samples=${pattern.sampleCount}, confidence=${confidencePercent}%, signature="${signature}".`, + 320, + ); +} + +function expireStaleAugmentations( + repository: ISessionOutcomeRepository, + projectPath: string, + jobType: JobType, + now: number, +): number[] { + const expired: number[] = []; + const activeAugmentations = repository.listAugmentations({ + includeExpired: true, + jobType, + projectPath, + status: 'active', + }); + + for (const augmentation of activeAugmentations) { + if (augmentation.expiresAt != null && augmentation.expiresAt <= now) { + repository.updateAugmentationStatus(augmentation.id, 'expired', projectPath); + expired.push(augmentation.id); + } + } + + return expired; +} + +function expireAugmentationsAfterSuccessStreak( + repository: ISessionOutcomeRepository, + projectPath: string, + jobType: JobType, + successStreakToExpire: number, + now: number, +): number[] { + if (successStreakToExpire <= 0) { + return []; + } + + const successStreak = countSuccessStreak(repository, projectPath, jobType); + if (successStreak < successStreakToExpire) { + return []; + } + + const expired: number[] = []; + const activeAugmentations = repository.listActiveAugmentations(projectPath, jobType, now); + for (const augmentation of activeAugmentations) { + repository.updateAugmentationStatus(augmentation.id, 'expired', projectPath); + expired.push(augmentation.id); + } + return expired; +} + +function enforceAugmentationCap( + repository: ISessionOutcomeRepository, + projectPath: string, + jobType: JobType, + maxActiveAugmentations: number, + now: number, +): number[] { + if (maxActiveAugmentations < 1) { + return repository.listActiveAugmentations(projectPath, jobType, now).map((augmentation) => { + repository.updateAugmentationStatus(augmentation.id, 'expired', projectPath); + return augmentation.id; + }); + } + + const activeAugmentations = repository.listActiveAugmentations(projectPath, jobType, now); + if (activeAugmentations.length <= maxActiveAugmentations) { + return []; + } + + const activePatterns = repository.listPatterns({ + jobType, + projectPath, + status: 'active', + limit: 100, + }); + const confidenceByPatternId = new Map( + activePatterns.map((pattern) => [pattern.id, pattern.confidence]), + ); + const keepIds = new Set( + activeAugmentations + .slice() + .sort((left, right) => { + const leftConfidence = + left.patternId == null ? 0 : (confidenceByPatternId.get(left.patternId) ?? 0); + const rightConfidence = + right.patternId == null ? 0 : (confidenceByPatternId.get(right.patternId) ?? 0); + if (leftConfidence !== rightConfidence) { + return rightConfidence - leftConfidence; + } + if (left.createdAt !== right.createdAt) { + return right.createdAt - left.createdAt; + } + return right.id - left.id; + }) + .slice(0, maxActiveAugmentations) + .map((augmentation) => augmentation.id), + ); + + const expired: number[] = []; + for (const augmentation of activeAugmentations) { + if (!keepIds.has(augmentation.id)) { + repository.updateAugmentationStatus(augmentation.id, 'expired', projectPath); + expired.push(augmentation.id); + } + } + return expired; +} + +function findActiveAugmentationForPattern( + repository: ISessionOutcomeRepository, + projectPath: string, + jobType: JobType, + patternId: number, + now: number, +): IPromptAugmentation | null { + return ( + repository + .listActiveAugmentations(projectPath, jobType, now) + .find((augmentation) => augmentation.patternId === patternId) ?? null + ); +} + +export function analyzeFeedbackOutcome( + repository: ISessionOutcomeRepository, + outcome: ISessionOutcome, + options: IFeedbackPatternAnalysisOptions = {}, +): IFeedbackPatternAnalysisResult { + const now = options.now ?? outcome.finishedAt ?? Date.now(); + const expiredAugmentationIds = expireStaleAugmentations( + repository, + outcome.projectPath, + outcome.jobType, + now, + ); + + if (!isFailureOutcome(outcome) || !outcome.failureSignature || !outcome.failureCategory) { + expiredAugmentationIds.push( + ...expireAugmentationsAfterSuccessStreak( + repository, + outcome.projectPath, + outcome.jobType, + options.successStreakToExpire ?? DEFAULT_SUCCESS_STREAK_TO_EXPIRE, + now, + ), + ); + return { augmentation: null, expiredAugmentationIds, pattern: null }; + } + + const existingPattern = + repository + .listPatterns({ + jobType: outcome.jobType, + projectPath: outcome.projectPath, + limit: 100, + }) + .find((pattern) => pattern.patternKey === outcome.failureSignature) ?? null; + const sampleCount = (existingPattern?.sampleCount ?? 0) + 1; + const streakStats = countRecentStreaks(repository, outcome); + const confidence = calculateConfidence( + sampleCount, + outcome.finishedAt, + streakStats.failureStreak, + streakStats.signatureStreak, + now, + ); + const status = + confidence >= (options.confidenceThreshold ?? DEFAULT_CONFIDENCE_THRESHOLD) + ? 'active' + : (existingPattern?.status ?? 'observing'); + + const pattern = repository.upsertPattern({ + category: outcome.failureCategory, + confidence, + description: buildPatternDescription(outcome, sampleCount), + jobType: outcome.jobType, + lastSeenAt: outcome.finishedAt, + metadata: { + confidenceInputs: { + failureStreak: streakStats.failureStreak, + recencyScore: calculateRecencyScore(now, outcome.finishedAt), + sampleCount, + signatureStreak: streakStats.signatureStreak, + }, + failureSignature: outcome.failureSignature, + fileArea: getFileArea(outcome), + firstErrorLine: getStringMetadata(outcome.metadata, 'firstErrorLine'), + lastOutcomeId: outcome.id, + }, + patternKey: outcome.failureSignature, + projectPath: outcome.projectPath, + sampleCount, + status, + title: buildPatternTitle(outcome), + }); + + let augmentation: IPromptAugmentation | null = null; + if (pattern.status === 'active') { + augmentation = findActiveAugmentationForPattern( + repository, + outcome.projectPath, + outcome.jobType, + pattern.id, + now, + ); + + if (!augmentation) { + augmentation = repository.createAugmentation({ + expiresAt: now + (options.augmentationTtlMs ?? DEFAULT_AUGMENTATION_TTL_MS), + jobType: outcome.jobType, + patternId: pattern.id, + projectPath: outcome.projectPath, + promptText: buildAugmentationPrompt(pattern, outcome), + status: 'active', + }); + } + } + + expiredAugmentationIds.push( + ...enforceAugmentationCap( + repository, + outcome.projectPath, + outcome.jobType, + options.maxActiveAugmentations ?? DEFAULT_MAX_ACTIVE_AUGMENTATIONS, + now, + ), + ); + + return { augmentation, expiredAugmentationIds, pattern }; +} diff --git a/packages/core/src/feedback/prompt-augmenter.ts b/packages/core/src/feedback/prompt-augmenter.ts new file mode 100644 index 00000000..bed16c3e --- /dev/null +++ b/packages/core/src/feedback/prompt-augmenter.ts @@ -0,0 +1,132 @@ +/** + * Prompt augmentation helpers for active project feedback snippets. + */ + +import type { ISessionOutcomeRepository } from '@/storage/repositories/interfaces.js'; +import type { IPromptAugmentation, JobType } from '@/types.js'; + +const DEFAULT_MAX_ACTIVE_AUGMENTATIONS = 3; +const MAX_SNIPPET_LENGTH = 260; +const DISABLED_VALUES = new Set(['0', 'false', 'no', 'off', 'disabled']); + +export interface IPromptAugmenterOptions { + feedbackEnabled?: boolean; + markApplied?: boolean; + maxActiveAugmentations?: number; + now?: number; +} + +export interface IProjectFeedbackPromptResult { + augmentationIds: number[]; + promptBlock: string; +} + +function isActiveAt(augmentation: IPromptAugmentation, now: number): boolean { + return ( + augmentation.status === 'active' && + (augmentation.expiresAt == null || augmentation.expiresAt > now) + ); +} + +function normalizeMaxActive(value: number | undefined): number { + if (value == null || !Number.isFinite(value)) { + return DEFAULT_MAX_ACTIVE_AUGMENTATIONS; + } + return Math.max(0, Math.floor(value)); +} + +function truncateSnippet(value: string): string { + const normalized = value.replace(/\s+/g, ' ').trim(); + if (normalized.length <= MAX_SNIPPET_LENGTH) { + return normalized; + } + return `${normalized.slice(0, MAX_SNIPPET_LENGTH - 3).trimEnd()}...`; +} + +export function isFeedbackPromptEnabled(): boolean { + const raw = process.env.NW_FEEDBACK_ENABLED ?? process.env.NW_FEEDBACK_PROMPT_ENABLED; + if (!raw) { + return true; + } + return !DISABLED_VALUES.has(raw.trim().toLowerCase()); +} + +export function selectPromptAugmentations( + augmentations: IPromptAugmentation[], + options: IPromptAugmenterOptions = {}, +): IPromptAugmentation[] { + if (options.feedbackEnabled === false) { + return []; + } + + const now = options.now ?? Date.now(); + const maxActive = normalizeMaxActive(options.maxActiveAugmentations); + if (maxActive === 0) { + return []; + } + + return augmentations + .filter((augmentation) => isActiveAt(augmentation, now)) + .sort((left, right) => { + if (left.createdAt !== right.createdAt) { + return left.createdAt - right.createdAt; + } + return left.id - right.id; + }) + .slice(0, maxActive); +} + +export function renderProjectFeedbackBlock( + augmentations: IPromptAugmentation[], + options: IPromptAugmenterOptions = {}, +): string { + const selected = selectPromptAugmentations(augmentations, options); + if (selected.length === 0) { + return ''; + } + + const lines = [ + '## Project Feedback', + 'The following short notes come from repeated recent Night Watch failures. Treat them as targeted guardrails, not as replacements for the main task instructions.', + '', + ...selected.map((augmentation) => `- ${truncateSnippet(augmentation.promptText)}`), + ]; + + return lines.join('\n'); +} + +export function buildProjectFeedbackPromptBlock( + repository: ISessionOutcomeRepository, + projectPath: string, + jobType: JobType, + options: IPromptAugmenterOptions = {}, +): IProjectFeedbackPromptResult { + const enabled = options.feedbackEnabled ?? isFeedbackPromptEnabled(); + if (!enabled) { + return { augmentationIds: [], promptBlock: '' }; + } + + const now = options.now ?? Date.now(); + const activeAugmentations = repository.listActiveAugmentations(projectPath, jobType, now); + const selected = selectPromptAugmentations(activeAugmentations, { + ...options, + feedbackEnabled: enabled, + now, + }); + const promptBlock = renderProjectFeedbackBlock(selected, { + ...options, + feedbackEnabled: enabled, + now, + }); + + if (options.markApplied === true && promptBlock.length > 0) { + for (const augmentation of selected) { + repository.incrementAugmentationCounts(augmentation.id); + } + } + + return { + augmentationIds: selected.map((augmentation) => augmentation.id), + promptBlock, + }; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 608356f2..15d75486 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -47,6 +47,9 @@ export * from './utils/job-queue.js'; export * from './utils/summary.js'; export * from './analytics/index.js'; export * from './audit/index.js'; +export * from './feedback/outcome-parser.js'; +export * from './feedback/pattern-analyzer.js'; +export * from './feedback/prompt-augmenter.js'; export * from './templates/prd-template.js'; export * from './templates/slicer-prompt.js'; // Note: shared/types are re-exported selectively through types.ts to avoid duplicates. diff --git a/packages/core/src/shared/types.ts b/packages/core/src/shared/types.ts index 5d49d315..3869ff0f 100644 --- a/packages/core/src/shared/types.ts +++ b/packages/core/src/shared/types.ts @@ -254,6 +254,14 @@ export interface IAnalyticsConfig { analysisPrompt: string; } +export interface IFeedbackConfig { + enabled: boolean; + confidenceThreshold: number; + augmentationTtlDays: number; + maxActiveAugmentations: number; + successStreakToExpire: number; +} + // ==================== Night Watch Config ==================== /** @@ -303,6 +311,7 @@ export interface INightWatchConfig { qa: IQaConfig; audit: IAuditConfig; analytics: IAnalyticsConfig; + feedback: IFeedbackConfig; prResolver?: IPrResolverConfig; merger?: IMergerConfig; queue: IQueueConfig; diff --git a/packages/core/src/storage/repositories/index.ts b/packages/core/src/storage/repositories/index.ts index bf1d6220..241c99b8 100644 --- a/packages/core/src/storage/repositories/index.ts +++ b/packages/core/src/storage/repositories/index.ts @@ -16,17 +16,20 @@ import { IPrdStateRepository, IProjectRegistryRepository, IRoadmapStateRepository, + ISessionOutcomeRepository, } from './interfaces.js'; import { SqliteProjectRegistryRepository } from './sqlite/project-registry.repository.js'; import { SqliteExecutionHistoryRepository } from './sqlite/execution-history.repository.js'; import { SqlitePrdStateRepository } from './sqlite/prd-state.repository.js'; import { SqliteRoadmapStateRepository } from './sqlite/roadmap-state.repository.js'; +import { SqliteSessionOutcomeRepository } from './sqlite/session-outcome.repository.js'; export interface IRepositories { projectRegistry: IProjectRegistryRepository; executionHistory: IExecutionHistoryRepository; prdState: IPrdStateRepository; roadmapState: IRoadmapStateRepository; + sessionOutcomes: ISessionOutcomeRepository; } let _initialized = false; @@ -46,6 +49,7 @@ export function getRepositories(): IRepositories { executionHistory: container.resolve(SqliteExecutionHistoryRepository), prdState: container.resolve(SqlitePrdStateRepository), roadmapState: container.resolve(SqliteRoadmapStateRepository), + sessionOutcomes: container.resolve(SqliteSessionOutcomeRepository), }; } @@ -62,6 +66,7 @@ export function getRepositories(): IRepositories { executionHistory: new SqliteExecutionHistoryRepository(db), prdState: new SqlitePrdStateRepository(db), roadmapState: new SqliteRoadmapStateRepository(db), + sessionOutcomes: new SqliteSessionOutcomeRepository(db), }; } @@ -72,3 +77,5 @@ export function getRepositories(): IRepositories { export function resetRepositories(): void { _initialized = false; } + +export { SqliteSessionOutcomeRepository } from './sqlite/session-outcome.repository.js'; diff --git a/packages/core/src/storage/repositories/interfaces.ts b/packages/core/src/storage/repositories/interfaces.ts index 60474353..990c5964 100644 --- a/packages/core/src/storage/repositories/interfaces.ts +++ b/packages/core/src/storage/repositories/interfaces.ts @@ -4,6 +4,21 @@ */ import { BoardColumnName } from '@/board/types.js'; +import type { + IFeedbackPattern, + IFeedbackPatternQueryInput, + IFeedbackPatternUpsertInput, + IPromptAugmentation, + IPromptAugmentationInsertInput, + IPromptAugmentationQueryInput, + ISessionOutcome, + ISessionOutcomeInsertInput, + ISessionOutcomeQueryInput, + ISessionOutcomeSummary, + ISessionOutcomeSummaryInput, + JobType, + PromptAugmentationStatus, +} from '@/types.js'; import { IRegistryEntry } from '@/utils/registry.js'; import { IExecutionRecord } from '@/utils/execution-history.js'; import { IPrdStateEntry } from '@/utils/prd-states.js'; @@ -65,3 +80,24 @@ export interface IKanbanIssueRepository { close(number: number): void; addComment(number: number, body: string): void; } + +export interface ISessionOutcomeRepository { + insertOutcome(input: ISessionOutcomeInsertInput): ISessionOutcome; + queryOutcomes(input: ISessionOutcomeQueryInput): ISessionOutcome[]; + querySummary(input: ISessionOutcomeSummaryInput): ISessionOutcomeSummary; + upsertPattern(input: IFeedbackPatternUpsertInput): IFeedbackPattern; + listPatterns(input: IFeedbackPatternQueryInput): IFeedbackPattern[]; + createAugmentation(input: IPromptAugmentationInsertInput): IPromptAugmentation; + listAugmentations(input: IPromptAugmentationQueryInput): IPromptAugmentation[]; + listActiveAugmentations( + projectPath: string, + jobType: JobType, + now?: number, + ): IPromptAugmentation[]; + updateAugmentationStatus( + id: number, + status: PromptAugmentationStatus, + projectPath?: string, + ): IPromptAugmentation | null; + incrementAugmentationCounts(id: number, success?: boolean): void; +} diff --git a/packages/core/src/storage/repositories/sqlite/session-outcome.repository.ts b/packages/core/src/storage/repositories/sqlite/session-outcome.repository.ts new file mode 100644 index 00000000..fdc9a991 --- /dev/null +++ b/packages/core/src/storage/repositories/sqlite/session-outcome.repository.ts @@ -0,0 +1,568 @@ +/** + * SQLite implementation of ISessionOutcomeRepository. + * Persists structured feedback-loop outcomes, patterns, and prompt augmentations. + */ + +import Database from 'better-sqlite3'; +import { inject, injectable } from 'tsyringe'; + +import type { + FeedbackPatternStatus, + IFeedbackPattern, + IFeedbackPatternQueryInput, + IFeedbackPatternUpsertInput, + IPromptAugmentation, + IPromptAugmentationInsertInput, + IPromptAugmentationQueryInput, + ISessionOutcome, + ISessionOutcomeInsertInput, + ISessionOutcomeQueryInput, + ISessionOutcomeSummary, + ISessionOutcomeSummaryInput, + JobType, + PromptAugmentationStatus, + SessionOutcomeStatus, +} from '@/types.js'; + +import { ISessionOutcomeRepository } from '../interfaces.js'; + +interface ISessionOutcomeRow { + id: number; + project_path: string; + job_type: string; + provider_key: string; + prd_file: string | null; + pr_number: number | null; + branch_name: string | null; + started_at: number; + finished_at: number; + duration_seconds: number | null; + outcome: string; + exit_code: number | null; + attempt: number; + retry_count: number; + review_score: number | null; + ci_status: string | null; + failure_category: string | null; + failure_signature: string | null; + metadata_json: string; +} + +interface IFeedbackPatternRow { + id: number; + project_path: string; + pattern_key: string; + job_type: string; + category: string; + title: string; + description: string; + sample_count: number; + confidence: number; + first_seen_at: number; + last_seen_at: number; + status: string; + metadata_json: string; +} + +interface IPromptAugmentationRow { + id: number; + project_path: string; + pattern_id: number | null; + job_type: string; + prompt_text: string; + status: string; + created_at: number; + updated_at: number; + expires_at: number | null; + applied_count: number; + success_count: number; +} + +interface ISummaryCountRow { + key: string | null; + count: number; +} + +const SECRET_PLACEHOLDER = '[REDACTED_SECRET]'; +const SECRET_KEY_PATTERN = + /(?:api[_-]?key|authorization|client[_-]?secret|cookie|password|private[_-]?key|secret|token)/i; +const SECRET_TEXT_PATTERNS: Array<[RegExp, string]> = [ + [ + /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g, + SECRET_PLACEHOLDER, + ], + [/\bsk-ant-[\w-]{20,}\b/g, SECRET_PLACEHOLDER], + [/\bsk-[\w-]{20,}\b/g, SECRET_PLACEHOLDER], + [/\bgh[opsru]_\w{30,}\b/g, SECRET_PLACEHOLDER], + [/\bxox[baprs]-[\w-]{20,}\b/g, SECRET_PLACEHOLDER], + [/\b(?:AKIA|ASIA)[A-Z0-9]{16}\b/g, SECRET_PLACEHOLDER], + [/\b(Bearer|Basic)\s+[\w.~+/=-]{12,}/gi, `$1 ${SECRET_PLACEHOLDER}`], +]; + +function redactText(value: string): string { + return SECRET_TEXT_PATTERNS.reduce( + (current, [pattern, replacement]) => current.replace(pattern, replacement), + value, + ); +} + +function redactOptionalText(value: string | null | undefined): string | null { + return value == null ? null : redactText(value); +} + +function redactMetadataValue( + value: unknown, + key: string | undefined, + seen: WeakSet, +): unknown { + if (key && SECRET_KEY_PATTERN.test(key)) { + return SECRET_PLACEHOLDER; + } + + if (typeof value === 'string') { + return redactText(value); + } + + if (value === null || typeof value !== 'object') { + return value; + } + + if (seen.has(value)) { + return '[Circular]'; + } + seen.add(value); + + if (Array.isArray(value)) { + const redactedArray = value.map((item) => redactMetadataValue(item, undefined, seen)); + seen.delete(value); + return redactedArray; + } + + const redacted: Record = {}; + for (const [entryKey, entryValue] of Object.entries(value)) { + redacted[entryKey] = redactMetadataValue(entryValue, entryKey, seen); + } + seen.delete(value); + return redacted; +} + +function redactMetadata(metadata: Record | undefined): Record { + const value = redactMetadataValue(metadata ?? {}, undefined, new WeakSet()); + if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + return value as Record; + } + return {}; +} + +function parseMetadata(metadataJson: string): Record { + try { + const parsed: unknown = JSON.parse(metadataJson); + if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) { + return parsed as Record; + } + } catch { + return {}; + } + return {}; +} + +function rowToOutcome(row: ISessionOutcomeRow): ISessionOutcome { + return { + id: row.id, + projectPath: row.project_path, + jobType: row.job_type as JobType, + providerKey: row.provider_key, + prdFile: row.prd_file, + prNumber: row.pr_number, + branchName: row.branch_name, + startedAt: row.started_at, + finishedAt: row.finished_at, + durationSeconds: row.duration_seconds, + outcome: row.outcome as SessionOutcomeStatus, + exitCode: row.exit_code, + attempt: row.attempt, + retryCount: row.retry_count, + reviewScore: row.review_score, + ciStatus: row.ci_status, + failureCategory: row.failure_category, + failureSignature: row.failure_signature, + metadata: parseMetadata(row.metadata_json), + }; +} + +function rowToPattern(row: IFeedbackPatternRow): IFeedbackPattern { + return { + id: row.id, + projectPath: row.project_path, + patternKey: row.pattern_key, + jobType: row.job_type as JobType, + category: row.category, + title: row.title, + description: row.description, + sampleCount: row.sample_count, + confidence: row.confidence, + firstSeenAt: row.first_seen_at, + lastSeenAt: row.last_seen_at, + status: row.status as FeedbackPatternStatus, + metadata: parseMetadata(row.metadata_json), + }; +} + +function rowToAugmentation(row: IPromptAugmentationRow): IPromptAugmentation { + return { + id: row.id, + projectPath: row.project_path, + patternId: row.pattern_id, + jobType: row.job_type as JobType, + promptText: row.prompt_text, + status: row.status as PromptAugmentationStatus, + createdAt: row.created_at, + updatedAt: row.updated_at, + expiresAt: row.expires_at, + appliedCount: row.applied_count, + successCount: row.success_count, + }; +} + +function buildOutcomeWhere(input: ISessionOutcomeSummaryInput | ISessionOutcomeQueryInput): { + params: Array; + where: string; +} { + const clauses = ['project_path = ?']; + const params: Array = [input.projectPath]; + + if (input.jobType) { + clauses.push('job_type = ?'); + params.push(input.jobType); + } + if ('outcome' in input && input.outcome) { + clauses.push('outcome = ?'); + params.push(input.outcome); + } + if (input.fromFinishedAt != null) { + clauses.push('finished_at >= ?'); + params.push(input.fromFinishedAt); + } + if (input.toFinishedAt != null) { + clauses.push('finished_at <= ?'); + params.push(input.toFinishedAt); + } + + return { params, where: clauses.join(' AND ') }; +} + +@injectable() +export class SqliteSessionOutcomeRepository implements ISessionOutcomeRepository { + private readonly db: Database.Database; + + constructor(@inject('Database') db: Database.Database) { + this.db = db; + } + + insertOutcome(input: ISessionOutcomeInsertInput): ISessionOutcome { + const metadataJson = JSON.stringify(redactMetadata(input.metadata)); + const result = this.db + .prepare( + `INSERT INTO session_outcomes + (project_path, job_type, provider_key, prd_file, pr_number, branch_name, + started_at, finished_at, duration_seconds, outcome, exit_code, attempt, + retry_count, review_score, ci_status, failure_category, failure_signature, + metadata_json) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ) + .run( + input.projectPath, + input.jobType, + input.providerKey, + input.prdFile ?? null, + input.prNumber ?? null, + redactOptionalText(input.branchName), + input.startedAt, + input.finishedAt, + input.durationSeconds ?? null, + input.outcome, + input.exitCode ?? null, + input.attempt ?? 1, + input.retryCount ?? 0, + input.reviewScore ?? null, + redactOptionalText(input.ciStatus), + redactOptionalText(input.failureCategory), + redactOptionalText(input.failureSignature), + metadataJson, + ); + + return this.getOutcomeById(Number(result.lastInsertRowid))!; + } + + queryOutcomes(input: ISessionOutcomeQueryInput): ISessionOutcome[] { + const { params, where } = buildOutcomeWhere(input); + const limit = Math.min(Math.max(input.limit ?? 100, 1), 500); + const rows = this.db + .prepare( + `SELECT * + FROM session_outcomes + WHERE ${where} + ORDER BY finished_at DESC, id DESC + LIMIT ?`, + ) + .all(...params, limit) as ISessionOutcomeRow[]; + + return rows.map(rowToOutcome); + } + + querySummary(input: ISessionOutcomeSummaryInput): ISessionOutcomeSummary { + const { params, where } = buildOutcomeWhere(input); + const outcomeRows = this.db + .prepare( + `SELECT outcome as key, COUNT(*) as count + FROM session_outcomes + WHERE ${where} + GROUP BY outcome`, + ) + .all(...params) as ISummaryCountRow[]; + + const categoryRows = this.db + .prepare( + `SELECT failure_category as key, COUNT(*) as count + FROM session_outcomes + WHERE ${where} AND failure_category IS NOT NULL + GROUP BY failure_category`, + ) + .all(...params) as ISummaryCountRow[]; + + const averageRow = this.db + .prepare( + `SELECT AVG(duration_seconds) as average_duration + FROM session_outcomes + WHERE ${where} AND duration_seconds IS NOT NULL`, + ) + .get(...params) as { average_duration: number | null } | undefined; + + const byOutcome = Object.fromEntries( + outcomeRows.map((row) => [row.key ?? 'unknown', row.count]), + ) as Record; + const byFailureCategory = Object.fromEntries( + categoryRows.map((row) => [row.key ?? 'unknown', row.count]), + ) as Record; + + return { + totalCount: outcomeRows.reduce((total, row) => total + row.count, 0), + successCount: byOutcome.success ?? 0, + failureCount: byOutcome.failure ?? 0, + timeoutCount: byOutcome.timeout ?? 0, + rateLimitedCount: byOutcome.rate_limited ?? 0, + skippedCount: byOutcome.skipped ?? 0, + averageDurationSeconds: averageRow?.average_duration ?? null, + byOutcome, + byFailureCategory, + }; + } + + upsertPattern(input: IFeedbackPatternUpsertInput): IFeedbackPattern { + const now = Date.now(); + const existing = this.getPattern(input.projectPath, input.patternKey, input.jobType); + const firstSeenAt = existing?.firstSeenAt ?? input.firstSeenAt ?? now; + const lastSeenAt = input.lastSeenAt ?? now; + const sampleCount = input.sampleCount ?? (existing ? existing.sampleCount + 1 : 1); + const confidence = input.confidence ?? existing?.confidence ?? 0; + const status = input.status ?? existing?.status ?? 'observing'; + const metadataJson = JSON.stringify(redactMetadata(input.metadata ?? existing?.metadata)); + + this.db + .prepare( + `INSERT INTO feedback_patterns + (project_path, pattern_key, job_type, category, title, description, sample_count, + confidence, first_seen_at, last_seen_at, status, metadata_json) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(project_path, pattern_key, job_type) + DO UPDATE SET category = excluded.category, + title = excluded.title, + description = excluded.description, + sample_count = excluded.sample_count, + confidence = excluded.confidence, + last_seen_at = excluded.last_seen_at, + status = excluded.status, + metadata_json = excluded.metadata_json`, + ) + .run( + input.projectPath, + input.patternKey, + input.jobType, + redactText(input.category), + redactText(input.title), + redactText(input.description), + sampleCount, + confidence, + firstSeenAt, + lastSeenAt, + status, + metadataJson, + ); + + return this.getPattern(input.projectPath, input.patternKey, input.jobType)!; + } + + listPatterns(input: IFeedbackPatternQueryInput): IFeedbackPattern[] { + const clauses = ['project_path = ?']; + const params: Array = [input.projectPath]; + + if (input.jobType) { + clauses.push('job_type = ?'); + params.push(input.jobType); + } + if (input.status) { + clauses.push('status = ?'); + params.push(input.status); + } + + const limit = Math.min(Math.max(input.limit ?? 25, 1), 100); + const rows = this.db + .prepare( + `SELECT * + FROM feedback_patterns + WHERE ${clauses.join(' AND ')} + ORDER BY sample_count DESC, confidence DESC, last_seen_at DESC, id DESC + LIMIT ?`, + ) + .all(...params, limit) as IFeedbackPatternRow[]; + + return rows.map(rowToPattern); + } + + createAugmentation(input: IPromptAugmentationInsertInput): IPromptAugmentation { + const now = Date.now(); + const createdAt = input.createdAt ?? now; + const updatedAt = input.updatedAt ?? createdAt; + const result = this.db + .prepare( + `INSERT INTO prompt_augmentations + (project_path, pattern_id, job_type, prompt_text, status, created_at, updated_at, expires_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + ) + .run( + input.projectPath, + input.patternId ?? null, + input.jobType, + redactText(input.promptText), + input.status ?? 'active', + createdAt, + updatedAt, + input.expiresAt ?? null, + ); + + return this.getAugmentationById(Number(result.lastInsertRowid))!; + } + + listAugmentations(input: IPromptAugmentationQueryInput): IPromptAugmentation[] { + const clauses = ['project_path = ?']; + const params: Array = [input.projectPath]; + + if (input.jobType) { + clauses.push('job_type = ?'); + params.push(input.jobType); + } + if (input.status) { + clauses.push('status = ?'); + params.push(input.status); + } + if (!input.includeExpired) { + clauses.push('(expires_at IS NULL OR expires_at > ?)'); + params.push(input.now ?? Date.now()); + } + + const limit = Math.min(Math.max(input.limit ?? 100, 1), 250); + const rows = this.db + .prepare( + `SELECT * + FROM prompt_augmentations + WHERE ${clauses.join(' AND ')} + ORDER BY created_at ASC, id ASC + LIMIT ?`, + ) + .all(...params, limit) as IPromptAugmentationRow[]; + + return rows.map(rowToAugmentation); + } + + listActiveAugmentations( + projectPath: string, + jobType: JobType, + now = Date.now(), + ): IPromptAugmentation[] { + const rows = this.db + .prepare( + `SELECT * + FROM prompt_augmentations + WHERE project_path = ? + AND job_type = ? + AND status = 'active' + AND (expires_at IS NULL OR expires_at > ?) + ORDER BY created_at ASC, id ASC`, + ) + .all(projectPath, jobType, now) as IPromptAugmentationRow[]; + + return rows.map(rowToAugmentation); + } + + updateAugmentationStatus( + id: number, + status: PromptAugmentationStatus, + projectPath?: string, + ): IPromptAugmentation | null { + const result = + projectPath === undefined + ? this.db + .prepare('UPDATE prompt_augmentations SET status = ?, updated_at = ? WHERE id = ?') + .run(status, Date.now(), id) + : this.db + .prepare( + `UPDATE prompt_augmentations + SET status = ?, updated_at = ? + WHERE id = ? AND project_path = ?`, + ) + .run(status, Date.now(), id, projectPath); + + return result.changes > 0 ? this.getAugmentationById(id) : null; + } + + incrementAugmentationCounts(id: number, success = false): void { + this.db + .prepare( + `UPDATE prompt_augmentations + SET applied_count = applied_count + 1, + success_count = success_count + ?, + updated_at = ? + WHERE id = ?`, + ) + .run(success ? 1 : 0, Date.now(), id); + } + + private getOutcomeById(id: number): ISessionOutcome | null { + const row = this.db.prepare('SELECT * FROM session_outcomes WHERE id = ?').get(id) as + | ISessionOutcomeRow + | undefined; + return row ? rowToOutcome(row) : null; + } + + private getPattern( + projectPath: string, + patternKey: string, + jobType: JobType, + ): IFeedbackPattern | null { + const row = this.db + .prepare( + `SELECT * + FROM feedback_patterns + WHERE project_path = ? AND pattern_key = ? AND job_type = ?`, + ) + .get(projectPath, patternKey, jobType) as IFeedbackPatternRow | undefined; + return row ? rowToPattern(row) : null; + } + + private getAugmentationById(id: number): IPromptAugmentation | null { + const row = this.db.prepare('SELECT * FROM prompt_augmentations WHERE id = ?').get(id) as + | IPromptAugmentationRow + | undefined; + return row ? rowToAugmentation(row) : null; + } +} diff --git a/packages/core/src/storage/sqlite/migrations.ts b/packages/core/src/storage/sqlite/migrations.ts index e61ce954..500f943d 100644 --- a/packages/core/src/storage/sqlite/migrations.ts +++ b/packages/core/src/storage/sqlite/migrations.ts @@ -123,6 +123,65 @@ export function runMigrations(db: Database.Database): void { ); CREATE INDEX IF NOT EXISTS idx_job_runs_lookup ON job_runs(project_path, started_at DESC, job_type, provider_key); + + CREATE TABLE IF NOT EXISTS session_outcomes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_path TEXT NOT NULL, + job_type TEXT NOT NULL, + provider_key TEXT NOT NULL, + prd_file TEXT, + pr_number INTEGER, + branch_name TEXT, + started_at INTEGER NOT NULL, + finished_at INTEGER NOT NULL, + duration_seconds INTEGER, + outcome TEXT NOT NULL, + exit_code INTEGER, + attempt INTEGER NOT NULL DEFAULT 1, + retry_count INTEGER NOT NULL DEFAULT 0, + review_score INTEGER, + ci_status TEXT, + failure_category TEXT, + failure_signature TEXT, + metadata_json TEXT NOT NULL DEFAULT '{}' + ); + CREATE INDEX IF NOT EXISTS idx_session_outcomes_lookup + ON session_outcomes(project_path, finished_at DESC, job_type, outcome); + + CREATE TABLE IF NOT EXISTS feedback_patterns ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_path TEXT NOT NULL, + pattern_key TEXT NOT NULL, + job_type TEXT NOT NULL, + category TEXT NOT NULL, + title TEXT NOT NULL, + description TEXT NOT NULL, + sample_count INTEGER NOT NULL DEFAULT 0, + confidence REAL NOT NULL DEFAULT 0, + first_seen_at INTEGER NOT NULL, + last_seen_at INTEGER NOT NULL, + status TEXT NOT NULL DEFAULT 'observing', + metadata_json TEXT NOT NULL DEFAULT '{}', + UNIQUE(project_path, pattern_key, job_type) + ); + CREATE INDEX IF NOT EXISTS idx_feedback_patterns_lookup + ON feedback_patterns(project_path, job_type, status, confidence DESC); + + CREATE TABLE IF NOT EXISTS prompt_augmentations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_path TEXT NOT NULL, + pattern_id INTEGER REFERENCES feedback_patterns(id), + job_type TEXT NOT NULL, + prompt_text TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'active', + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + expires_at INTEGER, + applied_count INTEGER NOT NULL DEFAULT 0, + success_count INTEGER NOT NULL DEFAULT 0 + ); + CREATE INDEX IF NOT EXISTS idx_prompt_augmentations_active + ON prompt_augmentations(project_path, job_type, status, expires_at); `); // Phase 2 cleanup: drop slack_discussions table (multi-agent deliberation removed) diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 2e34859e..d3cdaed4 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -319,6 +319,9 @@ export interface INightWatchConfig { /** Analytics job configuration (Amplitude integration) */ analytics: IAnalyticsConfig; + /** Self-improving feedback loop configuration */ + feedback: IFeedbackConfig; + /** PR conflict resolver configuration */ prResolver: IPrResolverConfig; @@ -400,6 +403,19 @@ export interface IAnalyticsConfig { analysisPrompt: string; } +export interface IFeedbackConfig { + /** Whether structured feedback analysis and prompt augmentation are enabled */ + enabled: boolean; + /** Minimum confidence required before a pattern activates */ + confidenceThreshold: number; + /** Number of days before active prompt augmentations expire */ + augmentationTtlDays: number; + /** Maximum active prompt augmentation snippets per job type */ + maxActiveAugmentations: number; + /** Consecutive successful runs before active augmentations expire */ + successStreakToExpire: number; +} + export interface IPrResolverConfig { /** Whether the PR resolver is enabled */ enabled: boolean; @@ -567,6 +583,158 @@ export type JobRunStatus = | 'rate_limited' | 'skipped'; +export type SessionOutcomeStatus = 'success' | 'failure' | 'timeout' | 'rate_limited' | 'skipped'; + +export type FeedbackPatternStatus = 'observing' | 'active' | 'dismissed' | 'resolved'; + +export type PromptAugmentationStatus = 'active' | 'paused' | 'expired' | 'archived'; + +/** + * Structured outcome for a completed Night Watch job session. + */ +export interface ISessionOutcome { + id: number; + projectPath: string; + jobType: JobType; + providerKey: string; + prdFile: string | null; + prNumber: number | null; + branchName: string | null; + startedAt: number; + finishedAt: number; + durationSeconds: number | null; + outcome: SessionOutcomeStatus; + exitCode: number | null; + attempt: number; + retryCount: number; + reviewScore: number | null; + ciStatus: string | null; + failureCategory: string | null; + failureSignature: string | null; + metadata: Record; +} + +export interface ISessionOutcomeInsertInput { + projectPath: string; + jobType: JobType; + providerKey: string; + prdFile?: string | null; + prNumber?: number | null; + branchName?: string | null; + startedAt: number; + finishedAt: number; + durationSeconds?: number | null; + outcome: SessionOutcomeStatus; + exitCode?: number | null; + attempt?: number; + retryCount?: number; + reviewScore?: number | null; + ciStatus?: string | null; + failureCategory?: string | null; + failureSignature?: string | null; + metadata?: Record; +} + +export interface ISessionOutcomeQueryInput { + projectPath: string; + jobType?: JobType; + outcome?: SessionOutcomeStatus; + fromFinishedAt?: number; + toFinishedAt?: number; + limit?: number; +} + +export interface ISessionOutcomeSummaryInput { + projectPath: string; + jobType?: JobType; + fromFinishedAt?: number; + toFinishedAt?: number; +} + +export interface ISessionOutcomeSummary { + totalCount: number; + successCount: number; + failureCount: number; + timeoutCount: number; + rateLimitedCount: number; + skippedCount: number; + averageDurationSeconds: number | null; + byOutcome: Record; + byFailureCategory: Record; +} + +export interface IFeedbackPatternQueryInput { + projectPath: string; + jobType?: JobType; + status?: FeedbackPatternStatus; + limit?: number; +} + +export interface IFeedbackPattern { + id: number; + projectPath: string; + patternKey: string; + jobType: JobType; + category: string; + title: string; + description: string; + sampleCount: number; + confidence: number; + firstSeenAt: number; + lastSeenAt: number; + status: FeedbackPatternStatus; + metadata: Record; +} + +export interface IFeedbackPatternUpsertInput { + projectPath: string; + patternKey: string; + jobType: JobType; + category: string; + title: string; + description: string; + sampleCount?: number; + confidence?: number; + firstSeenAt?: number; + lastSeenAt?: number; + status?: FeedbackPatternStatus; + metadata?: Record; +} + +export interface IPromptAugmentation { + id: number; + projectPath: string; + patternId: number | null; + jobType: JobType; + promptText: string; + status: PromptAugmentationStatus; + createdAt: number; + updatedAt: number; + expiresAt: number | null; + appliedCount: number; + successCount: number; +} + +export interface IPromptAugmentationInsertInput { + projectPath: string; + patternId?: number | null; + jobType: JobType; + promptText: string; + status?: PromptAugmentationStatus; + createdAt?: number; + updatedAt?: number; + expiresAt?: number | null; +} + +export interface IPromptAugmentationQueryInput { + projectPath: string; + jobType?: JobType; + status?: PromptAugmentationStatus; + includeExpired?: boolean; + now?: number; + limit?: number; +} + /** * A record of a single job execution stored in the job_runs table */ diff --git a/packages/server/src/__tests__/server/feedback-validation.test.ts b/packages/server/src/__tests__/server/feedback-validation.test.ts new file mode 100644 index 00000000..3e3beb57 --- /dev/null +++ b/packages/server/src/__tests__/server/feedback-validation.test.ts @@ -0,0 +1,191 @@ +/** + * Additional QA coverage for feedback API validation and aggregation behavior. + */ + +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; + +import request from 'supertest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { closeDb, getRepositories, resetRepositories } from '@night-watch/core'; +import { createApp } from '../../index.js'; + +vi.mock('child_process', () => ({ + exec: vi.fn( + ( + _cmd: string, + _opts: unknown, + cb?: (err: Error | null, result: { stdout: string; stderr: string }) => void, + ) => { + const callback = typeof _opts === 'function' ? (_opts as typeof cb) : cb; + callback?.(null, { stdout: '', stderr: '' }); + }, + ), + execFile: vi.fn(), + execSync: vi.fn(() => ''), + spawn: vi.fn(), +})); + +vi.mock('@night-watch/core/board/factory.js', () => ({ + createBoardProvider: vi.fn(() => ({ + closeIssue: vi.fn(), + commentOnIssue: vi.fn(), + createIssue: vi.fn(), + getAllIssues: vi.fn(), + getBoard: vi.fn(), + getColumns: vi.fn(), + getIssue: vi.fn(), + getIssuesByColumn: vi.fn(), + moveIssue: vi.fn(), + setupBoard: vi.fn(), + })), +})); + +vi.mock('@night-watch/core/utils/crontab.js', () => ({ + generateMarker: vi.fn((name: string) => `# night-watch-cli: ${name}`), + getEntries: vi.fn(() => []), + getProjectEntries: vi.fn(() => []), +})); + +function writeMinimalConfig(dir: string): void { + fs.writeFileSync(path.join(dir, 'package.json'), JSON.stringify({ name: 'test-project' })); + fs.writeFileSync( + path.join(dir, 'night-watch.config.json'), + JSON.stringify({ + defaultBranch: 'main', + projectName: 'test-project', + provider: 'claude', + reviewerEnabled: true, + }), + ); + fs.mkdirSync(path.join(dir, 'docs', 'PRDs', 'night-watch', 'done'), { recursive: true }); +} + +describe('feedback API validation', () => { + let app: ReturnType; + let tempDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + closeDb(); + resetRepositories(); + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'nw-feedback-validation-test-')); + process.env.NIGHT_WATCH_HOME = tempDir; + writeMinimalConfig(tempDir); + app = createApp(tempDir); + }); + + afterEach(() => { + closeDb(); + resetRepositories(); + delete process.env.NIGHT_WATCH_HOME; + fs.rmSync(tempDir, { recursive: true, force: true }); + vi.restoreAllMocks(); + }); + + it('should reject invalid augmentation update requests', async () => { + const invalidId = await request(app) + .patch('/api/feedback/augmentations/not-a-number') + .send({ action: 'disable' }); + expect(invalidId.status).toBe(400); + expect(invalidId.body.error).toBe('Invalid augmentation id'); + + const invalidBody = await request(app).patch('/api/feedback/augmentations/1').send({}); + expect(invalidBody.status).toBe(400); + expect(invalidBody.body.error).toBe('Expected action, enabled, or status update'); + }); + + it('should not update augmentations from another project', async () => { + const repo = getRepositories().sessionOutcomes; + const otherProjectAugmentation = repo.createAugmentation({ + projectPath: `${tempDir}-other-project`, + jobType: 'executor', + promptText: 'Do not leak across project scopes.', + status: 'active', + }); + + const response = await request(app) + .patch(`/api/feedback/augmentations/${otherProjectAugmentation.id}`) + .send({ action: 'disable' }); + + expect(response.status).toBe(404); + expect(response.body.error).toBe('Augmentation not found'); + expect(repo.listAugmentations({ projectPath: `${tempDir}-other-project` })[0].status).toBe( + 'active', + ); + }); + + it('should return stored patterns with aggregated top failure signatures', async () => { + const repo = getRepositories().sessionOutcomes; + const now = Date.now(); + + repo.upsertPattern({ + projectPath: tempDir, + patternKey: 'executor:tests', + jobType: 'executor', + category: 'tests', + title: 'Repeated test failures', + description: 'Executor runs repeatedly fail in vitest.', + sampleCount: 4, + confidence: 0.9, + status: 'active', + firstSeenAt: now - 10_000, + lastSeenAt: now, + }); + repo.upsertPattern({ + projectPath: tempDir, + patternKey: 'reviewer:lint', + jobType: 'reviewer', + category: 'lint', + title: 'Lint regressions', + description: 'Reviewer fixes repeatedly trigger lint failures.', + sampleCount: 2, + confidence: 0.75, + status: 'observing', + firstSeenAt: now - 20_000, + lastSeenAt: now - 1_000, + }); + + for (let i = 0; i < 3; i += 1) { + repo.insertOutcome({ + projectPath: tempDir, + jobType: 'executor', + providerKey: 'codex', + startedAt: now - 30_000 + i, + finishedAt: now - 20_000 + i, + durationSeconds: 10, + outcome: 'failure', + failureCategory: 'tests', + failureSignature: 'vitest failed', + }); + } + repo.insertOutcome({ + projectPath: tempDir, + jobType: 'reviewer', + providerKey: 'claude', + startedAt: now - 15_000, + finishedAt: now - 10_000, + durationSeconds: 5, + outcome: 'failure', + failureCategory: 'lint', + failureSignature: 'eslint failed', + }); + + const response = await request(app).get('/api/feedback/patterns'); + + expect(response.status).toBe(200); + expect(response.body.patterns.map((pattern: { title: string }) => pattern.title)).toEqual([ + 'Repeated test failures', + 'Lint regressions', + ]); + expect(response.body.topFailurePatterns[0]).toMatchObject({ + jobType: 'executor', + providerKey: 'codex', + category: 'tests', + signature: 'vitest failed', + sampleCount: 3, + }); + }); +}); diff --git a/packages/server/src/__tests__/server/feedback.test.ts b/packages/server/src/__tests__/server/feedback.test.ts new file mode 100644 index 00000000..a1c70aeb --- /dev/null +++ b/packages/server/src/__tests__/server/feedback.test.ts @@ -0,0 +1,169 @@ +/** + * Tests for feedback dashboard API routes. + */ + +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; + +import request from 'supertest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { closeDb, getRepositories, resetRepositories } from '@night-watch/core'; +import { createApp } from '../../index.js'; + +vi.mock('child_process', () => ({ + exec: vi.fn( + ( + _cmd: string, + _opts: unknown, + cb?: (err: Error | null, result: { stdout: string; stderr: string }) => void, + ) => { + const callback = typeof _opts === 'function' ? (_opts as typeof cb) : cb; + callback?.(null, { stdout: '', stderr: '' }); + }, + ), + execFile: vi.fn(), + execSync: vi.fn(() => ''), + spawn: vi.fn(), +})); + +vi.mock('@night-watch/core/board/factory.js', () => ({ + createBoardProvider: vi.fn(() => ({ + closeIssue: vi.fn(), + commentOnIssue: vi.fn(), + createIssue: vi.fn(), + getAllIssues: vi.fn(), + getBoard: vi.fn(), + getColumns: vi.fn(), + getIssue: vi.fn(), + getIssuesByColumn: vi.fn(), + moveIssue: vi.fn(), + setupBoard: vi.fn(), + })), +})); + +vi.mock('@night-watch/core/utils/crontab.js', () => ({ + generateMarker: vi.fn((name: string) => `# night-watch-cli: ${name}`), + getEntries: vi.fn(() => []), + getProjectEntries: vi.fn(() => []), +})); + +function writeMinimalConfig(dir: string): void { + fs.writeFileSync(path.join(dir, 'package.json'), JSON.stringify({ name: 'test-project' })); + fs.writeFileSync( + path.join(dir, 'night-watch.config.json'), + JSON.stringify({ + defaultBranch: 'main', + projectName: 'test-project', + provider: 'claude', + reviewerEnabled: true, + }), + ); + fs.mkdirSync(path.join(dir, 'docs', 'PRDs', 'night-watch', 'done'), { recursive: true }); +} + +describe('feedback API routes', () => { + let app: ReturnType; + let tempDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + closeDb(); + resetRepositories(); + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'nw-feedback-routes-test-')); + process.env.NIGHT_WATCH_HOME = tempDir; + writeMinimalConfig(tempDir); + app = createApp(tempDir); + }); + + afterEach(() => { + closeDb(); + resetRepositories(); + delete process.env.NIGHT_WATCH_HOME; + fs.rmSync(tempDir, { recursive: true, force: true }); + vi.restoreAllMocks(); + }); + + it('should return feedback summary', async () => { + const repo = getRepositories().sessionOutcomes; + const now = Date.now(); + + repo.insertOutcome({ + projectPath: tempDir, + jobType: 'executor', + providerKey: 'codex', + startedAt: now - 30_000, + finishedAt: now - 10_000, + durationSeconds: 20, + outcome: 'success', + }); + repo.insertOutcome({ + projectPath: tempDir, + jobType: 'reviewer', + providerKey: 'claude', + startedAt: now - 70_000, + finishedAt: now - 40_000, + durationSeconds: 30, + outcome: 'failure', + failureCategory: 'tests', + failureSignature: 'vitest failed', + }); + repo.insertOutcome({ + projectPath: tempDir, + jobType: 'executor', + providerKey: 'codex', + startedAt: now - 10 * 24 * 60 * 60 * 1000, + finishedAt: now - 10 * 24 * 60 * 60 * 1000 + 20_000, + durationSeconds: 20, + outcome: 'failure', + failureCategory: 'lint', + failureSignature: 'eslint failed', + }); + repo.createAugmentation({ + projectPath: tempDir, + jobType: 'reviewer', + promptText: 'Check for repeated test failures before editing.', + status: 'active', + }); + + const response = await request(app).get('/api/feedback/summary'); + + expect(response.status).toBe(200); + expect(response.body.projectPath).toBe(tempDir); + expect(response.body.windows.last7Days.totalCount).toBe(2); + expect(response.body.windows.last7Days.successCount).toBe(1); + expect(response.body.windows.last7Days.failureCount).toBe(1); + expect(response.body.windows.last7Days.successRate).toBe(0.5); + expect(response.body.windows.last7Days.byJobType.executor.totalCount).toBe(1); + expect(response.body.windows.last7Days.byProvider.codex.successCount).toBe(1); + expect(response.body.windows.last30Days.totalCount).toBe(3); + expect(response.body.activeAugmentations).toHaveLength(1); + }); + + it('should disable augmentation', async () => { + const repo = getRepositories().sessionOutcomes; + const augmentation = repo.createAugmentation({ + projectPath: tempDir, + jobType: 'executor', + promptText: 'Prefer the known fix for flaky tests.', + status: 'active', + }); + + const response = await request(app) + .patch(`/api/feedback/augmentations/${augmentation.id}`) + .send({ enabled: false }); + + expect(response.status).toBe(200); + expect(response.body.augmentation.id).toBe(augmentation.id); + expect(response.body.augmentation.status).toBe('paused'); + + const summary = await request(app).get('/api/feedback/summary'); + expect(summary.status).toBe(200); + expect(summary.body.windows.last7Days.totalCount).toBe(0); + expect(summary.body.windows.last7Days.successRate).toBeNull(); + expect(summary.body.windows.last7Days.byJobType).toEqual({}); + expect(summary.body.windows.last7Days.byProvider).toEqual({}); + expect(summary.body.activeAugmentations).toHaveLength(0); + }); +}); diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index de5f4e6f..ae4591b4 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -36,6 +36,7 @@ import { createProjectConfigRoutes, } from './routes/config.routes.js'; import { createDoctorRoutes, createProjectDoctorRoutes } from './routes/doctor.routes.js'; +import { createFeedbackRoutes, createProjectFeedbackRoutes } from './routes/feedback.routes.js'; import { createJobRoutes, createProjectJobRoutes } from './routes/job.routes.js'; import { createLogRoutes, createProjectLogRoutes } from './routes/log.routes.js'; import { createPrdRoutes, createProjectPrdRoutes } from './routes/prd.routes.js'; @@ -139,6 +140,7 @@ export function createApp(projectDir: string): Express { app.use('/api/logs', createLogRoutes({ projectDir })); app.use('/api/doctor', createDoctorRoutes({ projectDir, getConfig: () => config })); app.use('/api/queue', createQueueRoutes({ getConfig: () => config })); + app.use('/api/feedback', createFeedbackRoutes({ projectDir })); app.use('/api/global-notifications', createGlobalNotificationsRoutes()); app.get('/api/prs', async (_req: Request, res: Response): Promise => { @@ -191,6 +193,7 @@ function createProjectRouter() { router.use(createProjectActionRoutes({ projectSseClients })); router.use(createProjectJobRoutes()); router.use(createProjectRoadmapRoutes()); + router.use(createProjectFeedbackRoutes()); router.get('/prs', async (req: Request, res: Response): Promise => { try { diff --git a/packages/server/src/routes/feedback.routes.ts b/packages/server/src/routes/feedback.routes.ts new file mode 100644 index 00000000..6c7e5bbf --- /dev/null +++ b/packages/server/src/routes/feedback.routes.ts @@ -0,0 +1,307 @@ +/** + * Feedback routes: /api/feedback/* + */ + +import { Request, Response, Router } from 'express'; + +import { + IFeedbackPattern, + IPromptAugmentation, + ISessionOutcome, + ISessionOutcomeSummary, + JobType, + PromptAugmentationStatus, + SessionOutcomeStatus, + getRepositories, + getValidJobTypes, +} from '@night-watch/core'; + +const DAY_MS = 24 * 60 * 60 * 1000; +const WINDOW_DAYS = [7, 30] as const; +const VALID_AUGMENTATION_STATUSES: PromptAugmentationStatus[] = [ + 'active', + 'paused', + 'expired', + 'archived', +]; + +interface IFeedbackRoutesContext { + getProjectDir: (req: Request) => string; + pathPrefix: string; +} + +interface IFeedbackBreakdownSummary { + totalCount: number; + successCount: number; + failureCount: number; + timeoutCount: number; + rateLimitedCount: number; + skippedCount: number; + successRate: number | null; +} + +interface IFeedbackWindowSummary extends ISessionOutcomeSummary { + days: number; + fromFinishedAt: number; + toFinishedAt: number; + successRate: number | null; + byJobType: Record; + byProvider: Record; +} + +interface IFeedbackSummaryResponse { + projectPath: string; + windows: { + last7Days: IFeedbackWindowSummary; + last30Days: IFeedbackWindowSummary; + }; + activeAugmentations: IPromptAugmentation[]; +} + +interface ITopFailurePattern { + key: string; + jobType: JobType; + providerKey: string; + category: string | null; + signature: string | null; + sampleCount: number; + lastSeenAt: number; +} + +interface IFeedbackPatternsResponse { + projectPath: string; + patterns: IFeedbackPattern[]; + topFailurePatterns: ITopFailurePattern[]; +} + +interface IAugmentationPatchBody { + action?: 'enable' | 'disable' | 'expire'; + enabled?: boolean; + status?: PromptAugmentationStatus; +} + +function emptyBreakdown(): IFeedbackBreakdownSummary { + return { + totalCount: 0, + successCount: 0, + failureCount: 0, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: null, + }; +} + +function applyOutcome(summary: IFeedbackBreakdownSummary, outcome: SessionOutcomeStatus): void { + summary.totalCount += 1; + if (outcome === 'success') summary.successCount += 1; + if (outcome === 'failure') summary.failureCount += 1; + if (outcome === 'timeout') summary.timeoutCount += 1; + if (outcome === 'rate_limited') summary.rateLimitedCount += 1; + if (outcome === 'skipped') summary.skippedCount += 1; +} + +function finalizeBreakdown(summary: IFeedbackBreakdownSummary): IFeedbackBreakdownSummary { + return { + ...summary, + successRate: summary.totalCount > 0 ? summary.successCount / summary.totalCount : null, + }; +} + +function summarizeOutcomesBy( + outcomes: ISessionOutcome[], + getKey: (outcome: ISessionOutcome) => string, +): Record { + const grouped: Record = {}; + for (const outcome of outcomes) { + const key = getKey(outcome); + grouped[key] ??= emptyBreakdown(); + applyOutcome(grouped[key], outcome.outcome); + } + + return Object.fromEntries( + Object.entries(grouped).map(([key, summary]) => [key, finalizeBreakdown(summary)]), + ); +} + +function buildWindowSummary(projectPath: string, days: number): IFeedbackWindowSummary { + const repo = getRepositories().sessionOutcomes; + const toFinishedAt = Date.now(); + const fromFinishedAt = toFinishedAt - days * DAY_MS; + const base = repo.querySummary({ projectPath, fromFinishedAt, toFinishedAt }); + const outcomes = repo.queryOutcomes({ projectPath, fromFinishedAt, toFinishedAt, limit: 500 }); + + const byJobType = Object.fromEntries( + getValidJobTypes() + .map((jobType) => { + const summary = repo.querySummary({ projectPath, jobType, fromFinishedAt, toFinishedAt }); + return [ + jobType, + finalizeBreakdown({ + totalCount: summary.totalCount, + successCount: summary.successCount, + failureCount: summary.failureCount, + timeoutCount: summary.timeoutCount, + rateLimitedCount: summary.rateLimitedCount, + skippedCount: summary.skippedCount, + successRate: null, + }), + ] as const; + }) + .filter(([, summary]) => summary.totalCount > 0), + ); + + return { + ...base, + days, + fromFinishedAt, + toFinishedAt, + successRate: base.totalCount > 0 ? base.successCount / base.totalCount : null, + byJobType, + byProvider: summarizeOutcomesBy(outcomes, (outcome) => outcome.providerKey), + }; +} + +function getActiveAugmentations(projectPath: string): IPromptAugmentation[] { + return getRepositories().sessionOutcomes.listAugmentations({ + projectPath, + status: 'active', + includeExpired: false, + limit: 250, + }); +} + +function buildFailurePatterns(projectPath: string): ITopFailurePattern[] { + const outcomes = getRepositories().sessionOutcomes.queryOutcomes({ + projectPath, + outcome: 'failure', + limit: 500, + }); + const patterns = new Map(); + + for (const outcome of outcomes) { + const key = [ + outcome.jobType, + outcome.providerKey, + outcome.failureCategory ?? 'uncategorized', + outcome.failureSignature ?? 'unknown', + ].join(':'); + const current = patterns.get(key); + if (current) { + current.sampleCount += 1; + current.lastSeenAt = Math.max(current.lastSeenAt, outcome.finishedAt); + continue; + } + + patterns.set(key, { + key, + jobType: outcome.jobType, + providerKey: outcome.providerKey, + category: outcome.failureCategory, + signature: outcome.failureSignature, + sampleCount: 1, + lastSeenAt: outcome.finishedAt, + }); + } + + return [...patterns.values()] + .sort((a, b) => b.sampleCount - a.sampleCount || b.lastSeenAt - a.lastSeenAt) + .slice(0, 10); +} + +function resolveAugmentationStatus(body: IAugmentationPatchBody): PromptAugmentationStatus | null { + if (body.action === 'enable') return 'active'; + if (body.action === 'disable') return 'paused'; + if (body.action === 'expire') return 'expired'; + if (body.enabled === true) return 'active'; + if (body.enabled === false) return 'paused'; + if (body.status && VALID_AUGMENTATION_STATUSES.includes(body.status)) return body.status; + return null; +} + +function createFeedbackRouteHandlers(ctx: IFeedbackRoutesContext): Router { + const router = Router({ mergeParams: true }); + const p = ctx.pathPrefix; + + router.get(`/${p}summary`, (req: Request, res: Response): void => { + try { + const projectPath = ctx.getProjectDir(req); + const response: IFeedbackSummaryResponse = { + projectPath, + windows: { + last7Days: buildWindowSummary(projectPath, WINDOW_DAYS[0]), + last30Days: buildWindowSummary(projectPath, WINDOW_DAYS[1]), + }, + activeAugmentations: getActiveAugmentations(projectPath), + }; + res.json(response); + } catch (error) { + res.status(500).json({ error: error instanceof Error ? error.message : String(error) }); + } + }); + + router.get(`/${p}patterns`, (req: Request, res: Response): void => { + try { + const projectPath = ctx.getProjectDir(req); + const response: IFeedbackPatternsResponse = { + projectPath, + patterns: getRepositories().sessionOutcomes.listPatterns({ projectPath, limit: 25 }), + topFailurePatterns: buildFailurePatterns(projectPath), + }; + res.json(response); + } catch (error) { + res.status(500).json({ error: error instanceof Error ? error.message : String(error) }); + } + }); + + router.patch(`/${p}augmentations/:id`, (req: Request, res: Response): void => { + try { + const id = parseInt(req.params.id as string, 10); + if (!Number.isInteger(id) || id <= 0) { + res.status(400).json({ error: 'Invalid augmentation id' }); + return; + } + + const status = resolveAugmentationStatus(req.body as IAugmentationPatchBody); + if (!status) { + res.status(400).json({ error: 'Expected action, enabled, or status update' }); + return; + } + + const projectPath = ctx.getProjectDir(req); + const augmentation = getRepositories().sessionOutcomes.updateAugmentationStatus( + id, + status, + projectPath, + ); + if (!augmentation) { + res.status(404).json({ error: 'Augmentation not found' }); + return; + } + + res.json({ augmentation }); + } catch (error) { + res.status(500).json({ error: error instanceof Error ? error.message : String(error) }); + } + }); + + return router; +} + +export interface IFeedbackRoutesDeps { + projectDir: string; +} + +export function createFeedbackRoutes(deps: IFeedbackRoutesDeps): Router { + return createFeedbackRouteHandlers({ + getProjectDir: () => deps.projectDir, + pathPrefix: '', + }); +} + +export function createProjectFeedbackRoutes(): Router { + return createFeedbackRouteHandlers({ + getProjectDir: (req) => req.projectDir!, + pathPrefix: 'feedback/', + }); +} diff --git a/qa-artifacts/qa-feedback-dashboard.png b/qa-artifacts/qa-feedback-dashboard.png new file mode 100644 index 00000000..5acecce9 Binary files /dev/null and b/qa-artifacts/qa-feedback-dashboard.png differ diff --git a/qa-artifacts/qa-feedback-dashboard.webm b/qa-artifacts/qa-feedback-dashboard.webm new file mode 100644 index 00000000..f599c9bf Binary files /dev/null and b/qa-artifacts/qa-feedback-dashboard.webm differ diff --git a/scripts/night-watch-cron.sh b/scripts/night-watch-cron.sh index de4bd5a7..23964ff0 100755 --- a/scripts/night-watch-cron.sh +++ b/scripts/night-watch-cron.sh @@ -725,6 +725,11 @@ Follow all CLAUDE.md conventions (if present). - Do NOT process any other PRDs — only ${ELIGIBLE_PRD}" fi +if [ -n "${NW_PROJECT_FEEDBACK_PROMPT:-}" ]; then + PROMPT="${PROMPT}"$'\n\n'"${NW_PROJECT_FEEDBACK_PROMPT}" + log "INFO: Added project feedback prompt context" +fi + # Dry-run mode: print diagnostics and exit if [ "${NW_DRY_RUN:-0}" = "1" ]; then log "DRY-RUN: Would process ${ELIGIBLE_PRD}" diff --git a/scripts/night-watch-pr-reviewer-cron.sh b/scripts/night-watch-pr-reviewer-cron.sh index 357ba31c..513d3ceb 100755 --- a/scripts/night-watch-pr-reviewer-cron.sh +++ b/scripts/night-watch-pr-reviewer-cron.sh @@ -1233,6 +1233,10 @@ for ATTEMPT in $(seq 1 "${TOTAL_ATTEMPTS}"); do LOG_LINE_BEFORE=$(wc -l < "${LOG_FILE}" 2>/dev/null || echo 0) REVIEWER_ATTEMPT_START=$(date +%s) REVIEWER_PROMPT="${REVIEWER_PROMPT_BASE}${TARGET_SCOPE_PROMPT}${PRD_CONTEXT_PROMPT}" + if [ -n "${NW_PROJECT_FEEDBACK_PROMPT:-}" ]; then + REVIEWER_PROMPT="${REVIEWER_PROMPT}"$'\n\n'"${NW_PROJECT_FEEDBACK_PROMPT}" + log "INFO: Added project feedback prompt context" + fi # Build provider command array using generic helper mapfile -d '' -t PROVIDER_CMD_PARTS < <(build_provider_cmd "${REVIEW_WORKTREE_DIR}" "${REVIEWER_PROMPT}") diff --git a/web/api.ts b/web/api.ts index 6767437e..971396ba 100644 --- a/web/api.ts +++ b/web/api.ts @@ -10,6 +10,7 @@ import type { IAnalyticsConfig, IAuditConfig, IBoardProviderConfig, + IFeedbackConfig, IJobProviders, ILogInfo, IMergerConfig, @@ -43,7 +44,7 @@ import { getWebJobDef } from './utils/jobs'; // Re-export shared types so consumers can import from either place export type { ClaudeModel, DayOfWeek, IAnalyticsConfig, IAuditConfig, IBoardProviderConfig, IJobProviders, ILogInfo, IMergerConfig, INightWatchConfig, - INotificationConfig, IPrdInfo, IProviderBucketConfig, IProviderPreset, IProviderScheduleOverride, IPrInfo, IProcessInfo, IQaConfig, + INotificationConfig, IFeedbackConfig, IPrdInfo, IProviderBucketConfig, IProviderPreset, IProviderScheduleOverride, IPrInfo, IProcessInfo, IQaConfig, IPrResolverConfig, IQueueConfig, IRoadmapItem, IRoadmapScannerConfig, IRoadmapStatus, IStatusSnapshot, IWebhookConfig, IWebhookTriggerConfig, IWebhookTriggerGithubConfig, IWebhookTriggerGithubRule, JobType, MergeMethod, QaArtifacts, QueueMode @@ -259,12 +260,120 @@ export interface ActionResult { error?: string; } +// ==================== Feedback Dashboard ==================== + +export type PromptAugmentationStatus = 'active' | 'paused' | 'expired' | 'archived'; + +export interface IFeedbackBreakdownSummary { + totalCount: number; + successCount: number; + failureCount: number; + timeoutCount: number; + rateLimitedCount: number; + skippedCount: number; + successRate: number | null; +} + +export interface IFeedbackWindowSummary extends IFeedbackBreakdownSummary { + days: number; + fromFinishedAt: number; + toFinishedAt: number; + averageDurationSeconds: number | null; + byOutcome: Record; + byFailureCategory: Record; + byJobType: Record; + byProvider: Record; +} + +export interface IPromptAugmentation { + id: number; + projectPath: string; + patternId: number | null; + jobType: JobType; + promptText: string; + status: PromptAugmentationStatus; + createdAt: number; + updatedAt: number; + expiresAt: number | null; + appliedCount: number; + successCount: number; +} + +export interface IFeedbackSummary { + projectPath: string; + windows: { + last7Days: IFeedbackWindowSummary; + last30Days: IFeedbackWindowSummary; + }; + activeAugmentations: IPromptAugmentation[]; +} + +export interface IFeedbackPattern { + id: number; + projectPath: string; + patternKey: string; + jobType: JobType; + category: string; + title: string; + description: string; + sampleCount: number; + confidence: number; + firstSeenAt: number; + lastSeenAt: number; + status: 'observing' | 'active' | 'dismissed' | 'resolved'; + metadata: Record; +} + +export interface ITopFailurePattern { + key: string; + jobType: JobType; + providerKey: string; + category: string | null; + signature: string | null; + sampleCount: number; + lastSeenAt: number; +} + +export interface IFeedbackPatterns { + projectPath: string; + patterns: IFeedbackPattern[]; + topFailurePatterns: ITopFailurePattern[]; +} + +export interface IAugmentationUpdate { + action?: 'enable' | 'disable' | 'expire'; + enabled?: boolean; + status?: PromptAugmentationStatus; +} + +export interface IAugmentationUpdateResult { + augmentation: IPromptAugmentation; +} + // ==================== API Functions ==================== export function fetchStatus(): Promise { return apiFetch(apiPath('/api/status')); } +export function fetchFeedbackSummary(): Promise { + return apiFetch(apiPath('/api/feedback/summary')); +} + +export function fetchFeedbackPatterns(): Promise { + return apiFetch(apiPath('/api/feedback/patterns')); +} + +export function updateFeedbackAugmentation( + id: number, + update: IAugmentationUpdate, +): Promise { + return apiFetch(apiPath(`/api/feedback/augmentations/${id}`), { + method: 'PATCH', + body: JSON.stringify(update), + }); +} + export function fetchPrs(): Promise { return apiFetch(apiPath('/api/prs')); } diff --git a/web/components/feedback/PatternList.tsx b/web/components/feedback/PatternList.tsx new file mode 100644 index 00000000..e1ee9105 --- /dev/null +++ b/web/components/feedback/PatternList.tsx @@ -0,0 +1,197 @@ +import React from 'react'; +import { Clock3, PauseCircle, TimerOff } from 'lucide-react'; +import type { + IAugmentationUpdate, + IFeedbackPattern, + IPromptAugmentation, + ITopFailurePattern, +} from '../../api.js'; +import Badge from '../ui/Badge.js'; +import Button from '../ui/Button.js'; + +interface IPatternListProps { + activePatterns: IFeedbackPattern[]; + augmentations: IPromptAugmentation[]; + topFailurePatterns: ITopFailurePattern[]; + updatingAugmentationId?: number | null; + onAugmentationAction: (id: number, action: NonNullable) => Promise | void; +} + +function formatPercent(value: number): string { + return `${Math.round(value * 100)}%`; +} + +function formatDate(value: number): string { + return new Date(value).toLocaleDateString([], { month: 'short', day: 'numeric' }); +} + +function getCategoryLabel(category: string | null): string { + return category?.replace(/_/g, ' ') || 'uncategorized'; +} + +const PatternList: React.FC = ({ + activePatterns, + augmentations, + topFailurePatterns, + updatingAugmentationId = null, + onAugmentationAction, +}) => { + return ( +
+
+
+
+

Active Patterns

+ {activePatterns.length} +
+ {activePatterns.length === 0 ? ( +

+ No active feedback patterns. +

+ ) : ( +
+ {activePatterns.map((pattern) => ( +
+
+
+
+ {pattern.title} +
+

{pattern.description}

+
+ + {pattern.jobType} + +
+
+ {getCategoryLabel(pattern.category)} + {pattern.sampleCount} samples + {formatPercent(pattern.confidence)} confidence +
+
+ ))} +
+ )} +
+ +
+
+

Top Failure Patterns

+ {topFailurePatterns.length} +
+ {topFailurePatterns.length === 0 ? ( +

+ No repeated failure signatures yet. +

+ ) : ( +
+ {topFailurePatterns.map((pattern) => ( +
+
+
+
+ {pattern.signature || getCategoryLabel(pattern.category)} +
+
+ {pattern.jobType} + {pattern.providerKey} +
+
+ + {pattern.sampleCount} + +
+
+ + Last seen {formatDate(pattern.lastSeenAt)} +
+
+ ))} +
+ )} +
+
+ +
+
+

Active Augmentations

+ 0 ? 'success' : 'neutral'}>{augmentations.length} +
+ {augmentations.length === 0 ? ( +

+ No active prompt augmentations. +

+ ) : ( +
+ + + + + + + + + + + + {augmentations.map((augmentation) => { + const isUpdating = updatingAugmentationId === augmentation.id; + const successRate = + augmentation.appliedCount > 0 + ? `${Math.round((augmentation.successCount / augmentation.appliedCount) * 100)}%` + : 'new'; + + return ( + + + + + + + + ); + })} + +
Prompt SnippetJobUseExpiresActions
+
+ {augmentation.promptText} +
+
+ + {augmentation.jobType} + + + {augmentation.appliedCount} applied · {successRate} + + {augmentation.expiresAt ? formatDate(augmentation.expiresAt) : 'No expiry'} + +
+ + +
+
+
+ )} +
+
+ ); +}; + +export default PatternList; diff --git a/web/components/feedback/PerformanceDashboard.tsx b/web/components/feedback/PerformanceDashboard.tsx new file mode 100644 index 00000000..83d58652 --- /dev/null +++ b/web/components/feedback/PerformanceDashboard.tsx @@ -0,0 +1,307 @@ +import React from 'react'; +import { AlertCircle, RefreshCw, TrendingDown, TrendingUp } from 'lucide-react'; +import { + fetchFeedbackPatterns, + fetchFeedbackSummary, + IAugmentationUpdate, + IFeedbackPatterns, + IFeedbackSummary, + updateFeedbackAugmentation, + useApi, +} from '../../api.js'; +import { useStore } from '../../store/useStore.js'; +import Badge from '../ui/Badge.js'; +import Button from '../ui/Button.js'; +import Card from '../ui/Card.js'; +import PatternList from './PatternList.js'; + +function formatPercent(value: number | null): string { + return value === null ? '—' : `${Math.round(value * 100)}%`; +} + +function formatDuration(seconds: number | null): string { + if (seconds === null) return '—'; + if (seconds < 60) return `${Math.round(seconds)}s`; + return `${Math.floor(seconds / 60)}m ${Math.round(seconds % 60)}s`; +} + +function getSortedEntries(values: Record, limit: number): Array<[string, number]> { + return Object.entries(values) + .sort(([, a], [, b]) => b - a) + .slice(0, limit); +} + +function getBreakdownEntries(values: IFeedbackSummary['windows']['last30Days']['byJobType']): Array<[string, string]> { + return Object.entries(values) + .sort(([, a], [, b]) => b.totalCount - a.totalCount) + .slice(0, 5) + .map(([key, summary]) => [key, `${formatPercent(summary.successRate)} · ${summary.totalCount} runs`]); +} + +function getTrendLabel(last7Rate: number | null, last30Rate: number | null): string { + if (last7Rate === null || last30Rate === null) return 'Waiting for comparable data'; + const delta = Math.round((last7Rate - last30Rate) * 100); + if (delta === 0) return 'Flat vs 30 days'; + return `${delta > 0 ? '+' : ''}${delta} pts vs 30 days`; +} + +function getTrendVariant(last7Rate: number | null, last30Rate: number | null): 'success' | 'warning' | 'neutral' { + if (last7Rate === null || last30Rate === null) return 'neutral'; + if (last7Rate >= last30Rate) return 'success'; + return 'warning'; +} + +interface IMetricProps { + label: string; + value: string; + detail: string; +} + +const Metric: React.FC = ({ label, value, detail }) => ( +
+
{label}
+
{value}
+
{detail}
+
+); + +const PerformanceDashboard: React.FC = () => { + const { addToast, selectedProjectId, globalModeLoading } = useStore(); + const [updatingAugmentationId, setUpdatingAugmentationId] = React.useState(null); + + const { + data: summary, + loading: summaryLoading, + error: summaryError, + refetch: refetchSummary, + } = useApi(fetchFeedbackSummary, [selectedProjectId], { enabled: !globalModeLoading }); + + const { + data: patterns, + loading: patternsLoading, + error: patternsError, + refetch: refetchPatterns, + } = useApi(fetchFeedbackPatterns, [selectedProjectId], { enabled: !globalModeLoading }); + + const handleRefresh = () => { + refetchSummary(); + refetchPatterns(); + }; + + const handleAugmentationAction = async (id: number, action: NonNullable) => { + setUpdatingAugmentationId(id); + try { + await updateFeedbackAugmentation(id, { action }); + addToast({ + title: action === 'expire' ? 'Augmentation Expired' : 'Augmentation Disabled', + message: 'Prompt augmentation state was updated.', + type: 'success', + }); + handleRefresh(); + } catch (err) { + addToast({ + title: 'Update Failed', + message: err instanceof Error ? err.message : 'Failed to update augmentation', + type: 'error', + }); + } finally { + setUpdatingAugmentationId(null); + } + }; + + const loading = summaryLoading || patternsLoading; + const error = summaryError || patternsError; + const last7 = summary?.windows.last7Days ?? null; + const last30 = summary?.windows.last30Days ?? null; + const activePatterns = (patterns?.patterns ?? []) + .filter((pattern) => pattern.status === 'active') + .sort((a, b) => b.confidence - a.confidence || b.sampleCount - a.sampleCount) + .slice(0, 5); + const topFailurePatterns = patterns?.topFailurePatterns.slice(0, 5) ?? []; + const categoryEntries = getSortedEntries(last30?.byFailureCategory ?? {}, 5); + const maxCategoryCount = Math.max(...categoryEntries.map(([, count]) => count), 1); + const hasRecordedOutcomes = (last30?.totalCount ?? 0) > 0 || (last7?.totalCount ?? 0) > 0; + const trendVariant = getTrendVariant(last7?.successRate ?? null, last30?.successRate ?? null); + const trendIcon = + trendVariant === 'success' ? ( + + ) : trendVariant === 'warning' ? ( + + ) : null; + + return ( +
+
+
+

+ Feedback Performance +

+

Outcome trends, repeated failures, and prompt augmentations.

+
+ +
+ + + {loading && !summary ? ( +
Loading feedback performance...
+ ) : error ? ( +
+ + {error.message} +
+ ) : !summary || !hasRecordedOutcomes ? ( +
+
No feedback outcomes recorded yet.
+

+ This panel will populate after executor, reviewer, QA, audit, planner, or merge jobs complete. +

+
+ ) : ( +
+
+ + + + +
+ +
+
+
+

Success-Rate Trend

+ + {trendIcon} + {getTrendLabel(last7?.successRate ?? null, last30?.successRate ?? null)} + +
+
+ {[ + { label: 'Last 7 days', value: last7?.successRate ?? 0 }, + { label: 'Last 30 days', value: last30?.successRate ?? 0 }, + ].map((row) => ( +
+
+ {row.label} + {formatPercent(row.value)} +
+
+
+
+
+ ))} +
+
+ +
+
+

Failure Categories

+ {categoryEntries.length} +
+ {categoryEntries.length === 0 ? ( +

+ No categorized failures in the last 30 days. +

+ ) : ( +
+ {categoryEntries.map(([category, count]) => ( +
+
+ {category.replace(/_/g, ' ')} + {count} +
+
+
+
+
+ ))} +
+ )} +
+
+ +
+
+
+

Job Breakdown

+ {Object.keys(last30?.byJobType ?? {}).length} +
+ {getBreakdownEntries(last30?.byJobType ?? {}).length === 0 ? ( +

+ No job-specific outcomes in the last 30 days. +

+ ) : ( +
+ {getBreakdownEntries(last30?.byJobType ?? {}).map(([jobType, detail]) => ( +
+ {jobType} + {detail} +
+ ))} +
+ )} +
+ +
+
+

Provider Breakdown

+ {Object.keys(last30?.byProvider ?? {}).length} +
+ {getBreakdownEntries(last30?.byProvider ?? {}).length === 0 ? ( +

+ No provider-specific outcomes in the last 30 days. +

+ ) : ( +
+ {getBreakdownEntries(last30?.byProvider ?? {}).map(([provider, detail]) => ( +
+ {provider} + {detail} +
+ ))} +
+ )} +
+
+
+ )} + + + + + +
+ ); +}; + +export default PerformanceDashboard; diff --git a/web/components/feedback/__tests__/PatternList.test.tsx b/web/components/feedback/__tests__/PatternList.test.tsx new file mode 100644 index 00000000..33a3fe8e --- /dev/null +++ b/web/components/feedback/__tests__/PatternList.test.tsx @@ -0,0 +1,39 @@ +import { render, screen } from '@testing-library/react'; +import userEvent from '@testing-library/user-event'; +import { describe, expect, it, vi } from 'vitest'; +import PatternList from '../PatternList.js'; + +const now = Date.now(); + +describe('PatternList', () => { + it('should disable augmentation', async () => { + const onAugmentationAction = vi.fn(); + + render( + , + ); + + await userEvent.click(screen.getByRole('button', { name: /disable/i })); + + expect(onAugmentationAction).toHaveBeenCalledWith(3, 'disable'); + }); +}); diff --git a/web/components/feedback/__tests__/PerformanceDashboard.test.tsx b/web/components/feedback/__tests__/PerformanceDashboard.test.tsx new file mode 100644 index 00000000..787dfde6 --- /dev/null +++ b/web/components/feedback/__tests__/PerformanceDashboard.test.tsx @@ -0,0 +1,196 @@ +import { render, screen, waitFor } from '@testing-library/react'; +import { afterEach, describe, expect, it, vi } from 'vitest'; +import PerformanceDashboard from '../PerformanceDashboard.js'; +import { useStore } from '../../../store/useStore.js'; + +const now = Date.now(); + +const summary = { + projectPath: '/tmp/night-watch', + windows: { + last7Days: { + days: 7, + fromFinishedAt: now - 7 * 24 * 60 * 60 * 1000, + toFinishedAt: now, + totalCount: 4, + successCount: 3, + failureCount: 1, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.75, + averageDurationSeconds: 90, + byOutcome: { success: 3, failure: 1 }, + byFailureCategory: { tests: 1 }, + byJobType: { + executor: { + totalCount: 4, + successCount: 3, + failureCount: 1, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.75, + }, + }, + byProvider: { + codex: { + totalCount: 4, + successCount: 3, + failureCount: 1, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.75, + }, + }, + }, + last30Days: { + days: 30, + fromFinishedAt: now - 30 * 24 * 60 * 60 * 1000, + toFinishedAt: now, + totalCount: 10, + successCount: 6, + failureCount: 3, + timeoutCount: 1, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.6, + averageDurationSeconds: 125, + byOutcome: { success: 6, failure: 3, timeout: 1 }, + byFailureCategory: { tests: 2, lint: 1 }, + byJobType: { + executor: { + totalCount: 6, + successCount: 4, + failureCount: 2, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.67, + }, + reviewer: { + totalCount: 4, + successCount: 2, + failureCount: 1, + timeoutCount: 1, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.5, + }, + }, + byProvider: { + codex: { + totalCount: 7, + successCount: 5, + failureCount: 2, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.71, + }, + claude: { + totalCount: 3, + successCount: 1, + failureCount: 1, + timeoutCount: 1, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.33, + }, + }, + }, + }, + activeAugmentations: [ + { + id: 7, + projectPath: '/tmp/night-watch', + patternId: 1, + jobType: 'executor', + promptText: 'Check flaky test setup before editing.', + status: 'active', + createdAt: now, + updatedAt: now, + expiresAt: null, + appliedCount: 2, + successCount: 1, + }, + ], +}; + +const patterns = { + projectPath: '/tmp/night-watch', + patterns: [ + { + id: 1, + projectPath: '/tmp/night-watch', + patternKey: 'executor:tests', + jobType: 'executor', + category: 'tests', + title: 'Repeated test failures', + description: 'Executor runs repeatedly fail in the test suite.', + sampleCount: 3, + confidence: 0.82, + firstSeenAt: now - 1000, + lastSeenAt: now, + status: 'active', + metadata: {}, + }, + ], + topFailurePatterns: [ + { + key: 'executor:codex:tests:vitest failed', + jobType: 'executor', + providerKey: 'codex', + category: 'tests', + signature: 'vitest failed', + sampleCount: 2, + lastSeenAt: now, + }, + ], +}; + +describe('PerformanceDashboard', () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it('should render feedback summary', async () => { + useStore.setState({ globalModeLoading: false, selectedProjectId: null }); + vi.stubGlobal( + 'fetch', + vi.fn((input: RequestInfo | URL) => { + const url = String(input); + if (url.endsWith('/api/feedback/summary')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(summary), + } as Response); + } + if (url.endsWith('/api/feedback/patterns')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(patterns), + } as Response); + } + return Promise.reject(new Error(`Unhandled URL: ${url}`)); + }), + ); + + render(); + + await waitFor(() => { + expect(screen.getAllByText('75%').length).toBeGreaterThan(0); + }); + + expect(screen.getByText('Feedback Performance')).toBeInTheDocument(); + expect(screen.getByText('Success-Rate Trend')).toBeInTheDocument(); + expect(screen.getByText('Failure Categories')).toBeInTheDocument(); + expect(screen.getByText('Job Breakdown')).toBeInTheDocument(); + expect(screen.getByText('Provider Breakdown')).toBeInTheDocument(); + expect(screen.getByText('reviewer')).toBeInTheDocument(); + expect(screen.getAllByText('codex').length).toBeGreaterThan(0); + expect(screen.getByText('Repeated test failures')).toBeInTheDocument(); + expect(screen.getByText('Check flaky test setup before editing.')).toBeInTheDocument(); + }); +}); diff --git a/web/pages/Dashboard.tsx b/web/pages/Dashboard.tsx index 6a4462b1..b729fb86 100644 --- a/web/pages/Dashboard.tsx +++ b/web/pages/Dashboard.tsx @@ -2,19 +2,19 @@ import React, { useState } from 'react'; import { useNavigate } from 'react-router-dom'; import { Activity, - CheckCircle, - Clock, ArrowRight, Calendar, + CheckCircle, + Clock, Play, Pause, RefreshCw, } from 'lucide-react'; import Card from '../components/ui/Card'; -import Button from '../components/ui/Button'; import { useApi, fetchScheduleInfo, fetchBoardStatus, triggerCancel, triggerClearLock, triggerJob, triggerInstallCron, triggerUninstallCron, BOARD_COLUMNS, IBoardStatus, BoardColumnName } from '../api'; import { useStore } from '../store/useStore'; import AgentStatusBar from '../components/dashboard/AgentStatusBar'; +import PerformanceDashboard from '../components/feedback/PerformanceDashboard.js'; const BOARD_COLUMN_COLORS: Record = { 'Draft': 'text-slate-400 bg-slate-500/10 ring-slate-500/20', @@ -72,13 +72,6 @@ const Dashboard: React.FC = () => { const boardReadyCount = boardStatus?.columns['Ready']?.length ?? 0; const boardInProgressCount = boardStatus?.columns['In Progress']?.length ?? 0; - const executorProcess = currentStatus.processes.find(p => p.name === 'executor'); - const reviewerProcess = currentStatus.processes.find(p => p.name === 'reviewer'); - const qaProcess = currentStatus.processes.find(p => p.name === 'qa'); - const auditProcess = currentStatus.processes.find(p => p.name === 'audit'); - const plannerProcess = currentStatus.processes.find(p => p.name === 'planner'); - const analyticsProcess = currentStatus.processes.find(p => p.name === 'analytics'); - const handleCancelProcess = async (type: 'run' | 'review') => { setCancellingProcess(type); try { @@ -343,6 +336,8 @@ const Dashboard: React.FC = () => { ) : null} + + {/* Board Widget */}
diff --git a/web/pages/Scheduling.tsx b/web/pages/Scheduling.tsx index 8dabd928..c8459d1d 100644 --- a/web/pages/Scheduling.tsx +++ b/web/pages/Scheduling.tsx @@ -32,6 +32,7 @@ import { useStore } from '../store/useStore'; import type { IAnalyticsConfig, IAuditConfig, + IFeedbackConfig, IJobProviders, IMergerConfig, INightWatchConfig, @@ -97,6 +98,7 @@ type AutomationForm = { qa: IQaConfig; audit: IAuditConfig; analytics: IAnalyticsConfig; + feedback: IFeedbackConfig; prResolver: IPrResolverConfig; merger: IMergerConfig; roadmapScanner: IRoadmapScannerConfig; @@ -155,6 +157,13 @@ const toAutomationForm = (config: INightWatchConfig): AutomationForm => ({ qa: config.qa || getDefaultQaConfig(), audit: config.audit || getDefaultAuditConfig(), analytics: config.analytics || getDefaultAnalyticsConfig(), + feedback: config.feedback ?? { + enabled: true, + confidenceThreshold: 0.75, + augmentationTtlDays: 14, + maxActiveAugmentations: 3, + successStreakToExpire: 3, + }, prResolver: config.prResolver ?? getDefaultPrResolverConfig(), merger: config.merger ?? getDefaultMergerConfig(), roadmapScanner: config.roadmapScanner || getDefaultRoadmapScannerConfig(), @@ -369,6 +378,7 @@ const Scheduling: React.FC = () => { qa: form.qa, audit: form.audit, analytics: form.analytics, + feedback: form.feedback, prResolver: form.prResolver, merger: form.merger, roadmapScanner: form.roadmapScanner, diff --git a/web/pages/Settings.tsx b/web/pages/Settings.tsx index 63102d9b..02d9cf82 100644 --- a/web/pages/Settings.tsx +++ b/web/pages/Settings.tsx @@ -9,6 +9,7 @@ import { IAnalyticsConfig, IAuditConfig, IBoardProviderConfig, + IFeedbackConfig, IJobProviders, IMergerConfig, INightWatchConfig, @@ -124,6 +125,7 @@ type ConfigForm = { qa: IQaConfig; audit: IAuditConfig; analytics: IAnalyticsConfig; + feedback: IFeedbackConfig; prResolver: IPrResolverConfig; merger: IMergerConfig; queue: INightWatchConfig['queue']; @@ -183,6 +185,13 @@ const toFormState = (config: INightWatchConfig): ConfigForm => { qa: config.qa || getDefaultQaConfig(), audit: config.audit || getDefaultAuditConfig(), analytics: config.analytics || getDefaultAnalyticsConfig(), + feedback: config.feedback ?? { + enabled: true, + confidenceThreshold: 0.75, + augmentationTtlDays: 14, + maxActiveAugmentations: 3, + successStreakToExpire: 3, + }, prResolver: config.prResolver ?? getDefaultPrResolverConfig(), merger: config.merger ?? getDefaultMergerConfig(), queue: config.queue || { @@ -400,6 +409,7 @@ const Settings: React.FC = () => { qa: form.qa, audit: form.audit, analytics: form.analytics, + feedback: form.feedback, prResolver: form.prResolver, merger: form.merger, queue: form.queue, diff --git a/web/pages/__tests__/Scheduling.test.tsx b/web/pages/__tests__/Scheduling.test.tsx index 79e7ccc5..4f05edfa 100644 --- a/web/pages/__tests__/Scheduling.test.tsx +++ b/web/pages/__tests__/Scheduling.test.tsx @@ -113,6 +113,13 @@ function makeConfig(overrides: Partial = {}): INightWatchConf targetColumn: 'Draft', analysisPrompt: '', }, + feedback: { + enabled: true, + confidenceThreshold: 0.75, + augmentationTtlDays: 14, + maxActiveAugmentations: 3, + successStreakToExpire: 3, + }, merger: { enabled: true, schedule: '55 */4 * * *', @@ -144,6 +151,10 @@ function makeConfig(overrides: Partial = {}): INightWatchConf ...base.analytics, ...(overrides.analytics ?? {}), }, + feedback: { + ...base.feedback, + ...(overrides.feedback ?? {}), + }, }; } diff --git a/web/pages/__tests__/Settings.scheduling.test.tsx b/web/pages/__tests__/Settings.scheduling.test.tsx index b56627a0..865597df 100644 --- a/web/pages/__tests__/Settings.scheduling.test.tsx +++ b/web/pages/__tests__/Settings.scheduling.test.tsx @@ -96,6 +96,13 @@ function makeConfig(overrides: Partial = {}): INightWatchConf targetColumn: 'Draft', analysisPrompt: '', }, + feedback: { + enabled: true, + confidenceThreshold: 0.75, + augmentationTtlDays: 14, + maxActiveAugmentations: 3, + successStreakToExpire: 3, + }, prResolver: { enabled: true, schedule: '10 */4 * * *', diff --git a/web/pages/settings/JobsTab.tsx b/web/pages/settings/JobsTab.tsx index ca7062c9..7fd0245e 100644 --- a/web/pages/settings/JobsTab.tsx +++ b/web/pages/settings/JobsTab.tsx @@ -8,6 +8,7 @@ import { Layout, Play, Search, + Sparkles, } from 'lucide-react'; import { IAnalyticsConfig, @@ -17,6 +18,7 @@ import { IPrResolverConfig, IRoadmapScannerConfig, IJobProviders, + IFeedbackConfig, MergeMethod, QaArtifacts, INightWatchConfig, @@ -55,6 +57,7 @@ interface IConfigFormJobs { qa: IQaConfig; audit: IAuditConfig; analytics: IAnalyticsConfig; + feedback: IFeedbackConfig; prResolver: IPrResolverConfig; merger: IMergerConfig; roadmapScanner: IRoadmapScannerConfig; @@ -115,6 +118,89 @@ const JobsTab: React.FC = ({ return (
+
+
+
+

Prompt Augmentation

+

+ Tune how repeated feedback patterns become prompt snippets. +

+
+
+ +
+
+
+
+
+
+ Enable prompt augmentation +

Adds capped feedback snippets to future job prompts.

+
+ updateField('feedback', { ...form.feedback, enabled: checked })} + /> +
+ + updateField('feedback', { + ...form.feedback, + confidenceThreshold: Math.max(0, Math.min(1, Number(e.target.value || 0))), + }) + } + helperText="Minimum confidence required before a snippet can activate." + /> + + updateField('feedback', { + ...form.feedback, + augmentationTtlDays: Math.max(1, Number(e.target.value || 1)), + }) + } + rightIcon={days} + helperText="How long an augmentation remains active before expiry." + /> + + updateField('feedback', { + ...form.feedback, + maxActiveAugmentations: Math.max(0, Number(e.target.value || 0)), + }) + } + helperText="Maximum active augmentation snippets applied to each job prompt." + /> + + updateField('feedback', { + ...form.feedback, + successStreakToExpire: Math.max(0, Number(e.target.value || 0)), + }) + } + helperText="Consecutive successes before active snippets expire." + /> +
+
+
+
diff --git a/web/tests/e2e/qa/qa-feedback-dashboard.spec.ts b/web/tests/e2e/qa/qa-feedback-dashboard.spec.ts new file mode 100644 index 00000000..d1d0d058 --- /dev/null +++ b/web/tests/e2e/qa/qa-feedback-dashboard.spec.ts @@ -0,0 +1,282 @@ +import { expect, test } from '@playwright/test'; + +const now = Date.now(); + +const jobSchedule = { + schedule: '*/30 * * * *', + installed: true, + nextRun: null, + delayMinutes: 0, + manualDelayMinutes: 0, + balancedDelayMinutes: 0, +}; + +const config = { + cronSchedule: '*/30 * * * *', + reviewerSchedule: '*/45 * * * *', + executorEnabled: true, + reviewerEnabled: true, + qa: { enabled: true, schedule: '15 */2 * * *' }, + audit: { enabled: true, schedule: '30 */6 * * *' }, + analytics: { enabled: true, schedule: '0 6 * * 1' }, + roadmapScanner: { enabled: true, slicerSchedule: '0 */6 * * *' }, + prResolver: { enabled: true, schedule: '15 6,14,22 * * *' }, + merger: { enabled: true, schedule: '55 */4 * * *' }, +}; + +test.describe('Dashboard - Feedback Performance QA', () => { + test.beforeEach(async ({ page }) => { + await page.route('**/api/mode', async (route) => { + await route.fulfill({ + contentType: 'application/json', + body: JSON.stringify({ globalMode: false }), + }); + }); + + await page.route('**/api/status', async (route) => { + await route.fulfill({ + contentType: 'application/json', + body: JSON.stringify({ + projectName: 'Night Watch', + projectDir: '/tmp/night-watch', + config, + prds: [], + processes: [ + { name: 'executor', running: false, pid: null }, + { name: 'reviewer', running: false, pid: null }, + { name: 'qa', running: false, pid: null }, + { name: 'audit', running: false, pid: null }, + { name: 'planner', running: false, pid: null }, + { name: 'analytics', running: false, pid: null }, + { name: 'pr-resolver', running: false, pid: null }, + { name: 'merger', running: false, pid: null }, + ], + prs: [], + logs: [], + crontab: { installed: true, entries: ['night-watch executor'] }, + activePrd: null, + timestamp: new Date(now).toISOString(), + }), + }); + }); + + await page.route('**/api/prs', async (route) => { + await route.fulfill({ + contentType: 'application/json', + body: JSON.stringify([]), + }); + }); + + await page.route('**/api/schedule-info', async (route) => { + await route.fulfill({ + contentType: 'application/json', + body: JSON.stringify({ + executor: jobSchedule, + reviewer: jobSchedule, + qa: jobSchedule, + audit: jobSchedule, + planner: jobSchedule, + analytics: jobSchedule, + prResolver: jobSchedule, + merger: jobSchedule, + paused: false, + schedulingPriority: 50, + entries: ['night-watch executor'], + }), + }); + }); + + await page.route('**/api/board/status', async (route) => { + await route.fulfill({ + contentType: 'application/json', + body: JSON.stringify({ + enabled: true, + columns: { + Draft: [], + Ready: [], + 'In Progress': [], + Review: [], + Done: [], + }, + }), + }); + }); + + await page.route('**/api/feedback/summary', async (route) => { + await route.fulfill({ + contentType: 'application/json', + body: JSON.stringify({ + projectPath: '/tmp/night-watch', + windows: { + last7Days: { + days: 7, + fromFinishedAt: now - 7 * 24 * 60 * 60 * 1000, + toFinishedAt: now, + totalCount: 5, + successCount: 4, + failureCount: 1, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.8, + averageDurationSeconds: 72, + byOutcome: { success: 4, failure: 1 }, + byFailureCategory: { tests: 1 }, + byJobType: { + executor: { + totalCount: 5, + successCount: 4, + failureCount: 1, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.8, + }, + }, + byProvider: { + codex: { + totalCount: 5, + successCount: 4, + failureCount: 1, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.8, + }, + }, + }, + last30Days: { + days: 30, + fromFinishedAt: now - 30 * 24 * 60 * 60 * 1000, + toFinishedAt: now, + totalCount: 12, + successCount: 9, + failureCount: 2, + timeoutCount: 1, + rateLimitedCount: 1, + skippedCount: 0, + successRate: 0.75, + averageDurationSeconds: 95, + byOutcome: { success: 9, failure: 2, timeout: 1 }, + byFailureCategory: { tests: 2, lint: 1 }, + byJobType: { + executor: { + totalCount: 8, + successCount: 6, + failureCount: 2, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.75, + }, + reviewer: { + totalCount: 4, + successCount: 3, + failureCount: 0, + timeoutCount: 1, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.75, + }, + }, + byProvider: { + codex: { + totalCount: 9, + successCount: 7, + failureCount: 1, + timeoutCount: 1, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.78, + }, + claude: { + totalCount: 3, + successCount: 2, + failureCount: 1, + timeoutCount: 0, + rateLimitedCount: 0, + skippedCount: 0, + successRate: 0.67, + }, + }, + }, + }, + activeAugmentations: [ + { + id: 7, + projectPath: '/tmp/night-watch', + patternId: 1, + jobType: 'executor', + promptText: 'Check known flaky test setup before editing.', + status: 'active', + createdAt: now, + updatedAt: now, + expiresAt: null, + appliedCount: 2, + successCount: 1, + }, + ], + }), + }); + }); + + await page.route('**/api/feedback/patterns', async (route) => { + await route.fulfill({ + contentType: 'application/json', + body: JSON.stringify({ + projectPath: '/tmp/night-watch', + patterns: [ + { + id: 1, + projectPath: '/tmp/night-watch', + patternKey: 'executor:tests', + jobType: 'executor', + category: 'tests', + title: 'Repeated test failures', + description: 'Executor runs repeatedly fail in vitest.', + sampleCount: 3, + confidence: 0.86, + firstSeenAt: now - 10_000, + lastSeenAt: now, + status: 'active', + metadata: {}, + }, + ], + topFailurePatterns: [ + { + key: 'executor:codex:tests:vitest failed', + jobType: 'executor', + providerKey: 'codex', + category: 'tests', + signature: 'vitest failed', + sampleCount: 3, + lastSeenAt: now, + }, + ], + }), + }); + }); + }); + + test('should render feedback performance metrics and augmentation controls', async ({ page }) => { + await page.goto('#/'); + await page.waitForLoadState('networkidle'); + + await expect(page.getByRole('heading', { name: 'Feedback Performance' })).toBeVisible(); + await expect(page.getByText('80%').first()).toBeVisible(); + await expect(page.getByText('75%').first()).toBeVisible(); + await expect(page.getByText('Success-Rate Trend')).toBeVisible(); + await expect(page.getByText('Failure Categories')).toBeVisible(); + await expect(page.getByText('Job Breakdown')).toBeVisible(); + await expect(page.getByText('Provider Breakdown')).toBeVisible(); + await expect(page.getByText('Repeated test failures')).toBeVisible(); + await expect(page.getByText('Check known flaky test setup before editing.')).toBeVisible(); + await expect(page.getByRole('button', { name: /disable/i })).toBeVisible(); + await expect(page.getByRole('button', { name: /expire/i })).toBeVisible(); + + await page.screenshot({ + path: 'test-results/qa-feedback-dashboard.png', + fullPage: true, + }); + }); +}); diff --git a/web/vitest.config.ts b/web/vitest.config.ts index 9e243f1a..6621be37 100644 --- a/web/vitest.config.ts +++ b/web/vitest.config.ts @@ -14,7 +14,12 @@ export default defineConfig({ globals: true, environment: 'happy-dom', setupFiles: ['./src/__tests__/setup.ts'], - include: ['src/**/__tests__/**/*.test.tsx', 'pages/**/__tests__/**/*.test.tsx', 'hooks/**/__tests__/**/*.test.ts'], + include: [ + 'components/**/__tests__/**/*.test.tsx', + 'src/**/__tests__/**/*.test.tsx', + 'pages/**/__tests__/**/*.test.tsx', + 'hooks/**/__tests__/**/*.test.ts', + ], coverage: { provider: 'v8', reporter: ['text', 'json', 'html'],