diff --git a/src/cli.tsx b/src/cli.tsx index 21d4797..13606a8 100644 --- a/src/cli.tsx +++ b/src/cli.tsx @@ -18,7 +18,7 @@ import { quickSetup, loadServiceConfig, ONBOARDING_PROMPTS } from './config/onbo import { SetupWizard } from './cli/setup-wizard'; import { ChatInterface } from './cli/chat'; import { MarkdownText } from './cli/components/markdown'; -import { createRetriever } from './knowledge/retriever'; +import { createConfiguredRetriever } from './knowledge/retriever'; import type { AgentEvent } from './agent/types'; import { skillRegistry } from './skills/registry'; import { getRuntimeTools } from './cli/runtime-tools'; @@ -56,8 +56,8 @@ const VERSION = '0.1.0'; /** * Knowledge retriever adapter for Agent runtime. */ -function createAgentKnowledgeRetriever() { - const retriever = createRetriever(); +async function createAgentKnowledgeRetriever(config: Awaited>) { + const retriever = await createConfiguredRetriever('.runbook', config); return { retrieve: async (context: { @@ -99,7 +99,7 @@ async function createRuntimeAgent(config: Awaited> llm, tools: runtimeTools, skills: runtimeSkills, - knowledgeRetriever: createAgentKnowledgeRetriever(), + knowledgeRetriever: await createAgentKnowledgeRetriever(config), config: { maxIterations: config.agent.maxIterations, maxHypothesisDepth: config.agent.maxHypothesisDepth, @@ -667,7 +667,7 @@ async function runStructuredInvestigation( availableTools: runtimeTools.map((tool) => tool.name), availableSkills: runtimeSkills, fetchRelevantRunbooks: async (context: RemediationContext) => { - const retriever = createRetriever(); + const retriever = await createConfiguredRetriever('.runbook', config); try { const searchQuery = [context.rootCause, ...context.affectedServices].join(' ').trim(); const results = await retriever.search( @@ -972,7 +972,7 @@ async function runStructuredInvestigation( } if (applyRunbookUpdates && learning.appliedRunbookUpdates.length > 0) { - const retriever = createRetriever(); + const retriever = await createConfiguredRetriever('.runbook', config); try { await retriever.sync(); console.log(chalk.gray('Knowledge index refreshed after runbook updates.')); @@ -1255,7 +1255,7 @@ knowledge .action(async () => { console.log(chalk.blue('Syncing knowledge from configured sources...')); try { - const retriever = createRetriever(); + const retriever = await createConfiguredRetriever(); const { added, updated } = await retriever.sync(); console.log(chalk.green(`Sync complete: ${added} added, ${updated} updated`)); console.log(chalk.green(`Total documents: ${retriever.getDocumentCount()}`)); @@ -1274,7 +1274,7 @@ knowledge const query = queryParts.join(' '); console.log(chalk.blue(`Searching for: "${query}"`)); try { - const retriever = createRetriever(); + const retriever = await createConfiguredRetriever(); const results = await retriever.search(query, { limit: 10, typeFilter: options.type @@ -1359,7 +1359,7 @@ knowledge await copyFile(filePath, destPath); // Sync to update the index - const retriever = createRetriever(); + const retriever = await createConfiguredRetriever(); await retriever.sync(); console.log(chalk.green(`Added: ${title}`)); @@ -1387,7 +1387,7 @@ knowledge console.log(chalk.blue(`Checking for content older than ${staleDays} days...`)); try { - const retriever = createRetriever(); + const retriever = await createConfiguredRetriever(); await retriever.sync(); // Get all documents directly from the store for accurate counts. @@ -1444,7 +1444,7 @@ knowledge console.log(chalk.blue('Knowledge Base Statistics:')); try { - const retriever = createRetriever(); + const retriever = await createConfiguredRetriever(); await retriever.sync(); const counts = retriever.getDocumentCountsByType(); diff --git a/src/cli/chat.tsx b/src/cli/chat.tsx index 5ffb00e..a31e523 100644 --- a/src/cli/chat.tsx +++ b/src/cli/chat.tsx @@ -16,7 +16,7 @@ import type { AgentEvent } from '../agent/types'; import { MarkdownText } from './components/markdown'; import { skillRegistry } from '../skills/registry'; import { getRuntimeTools } from './runtime-tools'; -import { createRetriever } from '../knowledge/retriever'; +import { createConfiguredRetriever } from '../knowledge/retriever'; import { createMemory, type ConversationMemory } from '../agent/conversation-memory'; const LOGO = ` @@ -91,7 +91,7 @@ export function ChatInterface() { await skillRegistry.loadUserSkills(); const runtimeSkills = skillRegistry.getAll().map((skill) => skill.id); const runtimeTools = await getRuntimeTools(config, toolRegistry.getAll()); - const retriever = createRetriever(); + const retriever = await createConfiguredRetriever('.runbook', config); const newAgent = new Agent({ llm, diff --git a/src/eval/investigation-benchmark.ts b/src/eval/investigation-benchmark.ts index c019dc5..10a43d0 100644 --- a/src/eval/investigation-benchmark.ts +++ b/src/eval/investigation-benchmark.ts @@ -5,7 +5,7 @@ import { createLLMClient } from '../model/llm'; import { toolRegistry } from '../tools/registry'; import { skillRegistry } from '../skills/registry'; import { getRuntimeTools } from '../cli/runtime-tools'; -import { createRetriever } from '../knowledge/retriever'; +import { createConfiguredRetriever } from '../knowledge/retriever'; import { createOrchestrator, type InvestigationEvent, @@ -351,7 +351,7 @@ async function main() { availableTools: runtimeTools.map((tool) => tool.name), availableSkills: runtimeSkills, fetchRelevantRunbooks: async (ctx: RemediationContext) => { - const retriever = createRetriever(); + const retriever = await createConfiguredRetriever('.runbook', config); try { const searchQuery = [ctx.rootCause, ...ctx.affectedServices].join(' ').trim(); const results = await retriever.search(searchQuery || 'incident remediation', { diff --git a/src/integrations/hook-handlers.ts b/src/integrations/hook-handlers.ts index 754144c..990daa4 100644 --- a/src/integrations/hook-handlers.ts +++ b/src/integrations/hook-handlers.ts @@ -8,7 +8,7 @@ import { existsSync } from 'fs'; import { readFile, writeFile, mkdir } from 'fs/promises'; import { join } from 'path'; -import { createRetriever, KnowledgeRetriever } from '../knowledge/retriever/index'; +import { createConfiguredRetriever, KnowledgeRetriever } from '../knowledge/retriever/index'; import type { RetrievedKnowledge, RetrievedChunk } from '../knowledge/types'; /** @@ -259,7 +259,7 @@ export async function handleSessionStart( let knowledgeStats = ''; try { - retriever = createRetriever(config.baseDir); + retriever = await createConfiguredRetriever(config.baseDir); const counts = retriever.getDocumentCountsByType(); const total = Object.values(counts).reduce((sum, c) => sum + c, 0); @@ -326,7 +326,7 @@ export async function handleUserPromptSubmit( let retriever: KnowledgeRetriever | null = null; try { - retriever = createRetriever(config.baseDir); + retriever = await createConfiguredRetriever(config.baseDir); knowledge = await retriever.search(searchQuery, { serviceFilter: services.length > 0 ? services : undefined, limit: 10, diff --git a/src/knowledge/retriever/__tests__/index.test.ts b/src/knowledge/retriever/__tests__/index.test.ts new file mode 100644 index 0000000..7b7d624 --- /dev/null +++ b/src/knowledge/retriever/__tests__/index.test.ts @@ -0,0 +1,186 @@ +import { afterEach, describe, expect, it } from 'vitest'; +import { mkdtemp, mkdir, rm, writeFile } from 'fs/promises'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { stringify as stringifyYaml } from 'yaml'; +import { createConfiguredRetriever } from '../index'; + +async function createTempDir(): Promise { + return mkdtemp(join(tmpdir(), 'runbook-knowledge-')); +} + +async function writeMarkdownFile(path: string, content: string): Promise { + await writeFile(path, content, 'utf-8'); +} + +describe('KnowledgeRetriever configuration + ingestion', () => { + const createdDirs: string[] = []; + + afterEach(async () => { + await Promise.all( + createdDirs.splice(0).map(async (dir) => { + await rm(dir, { recursive: true, force: true }); + }) + ); + }); + + it('loads configured filesystem sources from baseDir config', async () => { + const baseDir = await createTempDir(); + createdDirs.push(baseDir); + + const sourceDir = join(baseDir, 'external-knowledge'); + await mkdir(sourceDir, { recursive: true }); + await writeMarkdownFile( + join(sourceDir, 'checkout-timeout.md'), + `--- +type: runbook +services: [checkout] +--- +# Checkout Timeout Recovery + +Restart workers and verify queue depth. +` + ); + + await writeFile( + join(baseDir, 'config.yaml'), + stringifyYaml({ + knowledge: { + sources: [ + { + type: 'filesystem', + path: sourceDir, + }, + ], + store: { + path: join(baseDir, 'knowledge.db'), + }, + }, + }), + 'utf-8' + ); + + const retriever = await createConfiguredRetriever(baseDir); + try { + const syncResult = await retriever.sync(); + expect(syncResult.added).toBe(1); + + const results = await retriever.search('checkout timeout'); + expect(results.runbooks.length).toBeGreaterThan(0); + expect(results.runbooks[0].title).toContain('Checkout Timeout Recovery'); + } finally { + retriever.close(); + } + }); + + it('falls back to default runbooks path when configured sources are invalid', async () => { + const baseDir = await createTempDir(); + createdDirs.push(baseDir); + + const fallbackRunbooksDir = join(baseDir, 'runbooks'); + await mkdir(fallbackRunbooksDir, { recursive: true }); + await writeMarkdownFile( + join(fallbackRunbooksDir, 'latency-guide.md'), + `--- +type: runbook +services: [api] +--- +# API Latency Guide + +Check connection pools and recent deploys. +` + ); + + await writeFile( + join(baseDir, 'config.yaml'), + stringifyYaml({ + knowledge: { + sources: [ + { + type: 'github', + }, + ], + store: { + path: join(baseDir, 'knowledge.db'), + }, + }, + }), + 'utf-8' + ); + + const retriever = await createConfiguredRetriever(baseDir); + try { + await retriever.sync(); + const results = await retriever.search('latency'); + expect(results.runbooks.length).toBeGreaterThan(0); + expect(results.runbooks[0].title).toContain('API Latency Guide'); + } finally { + retriever.close(); + } + }); + + it('uses configured retrieval.topK as the default search limit', async () => { + const baseDir = await createTempDir(); + createdDirs.push(baseDir); + + const sourceDir = join(baseDir, 'knowledge-source'); + await mkdir(sourceDir, { recursive: true }); + await writeMarkdownFile( + join(sourceDir, 'doc-a.md'), + `--- +type: runbook +services: [payments] +--- +# Payments Retry Runbook + +retry-strategy guidance +` + ); + await writeMarkdownFile( + join(sourceDir, 'doc-b.md'), + `--- +type: runbook +services: [payments] +--- +# Payments Queue Runbook + +retry-strategy fallback +` + ); + + await writeFile( + join(baseDir, 'config.yaml'), + stringifyYaml({ + knowledge: { + sources: [ + { + type: 'filesystem', + path: sourceDir, + }, + ], + store: { + path: join(baseDir, 'knowledge.db'), + }, + retrieval: { + topK: 1, + }, + }, + }), + 'utf-8' + ); + + const retriever = await createConfiguredRetriever(baseDir); + try { + await retriever.sync(); + const results = await retriever.search('retry-strategy'); + const total = + results.runbooks.length + + results.postmortems.length + + results.architecture.length + + results.knownIssues.length; + expect(total).toBe(1); + } finally { + retriever.close(); + } + }); +}); diff --git a/src/knowledge/retriever/index.ts b/src/knowledge/retriever/index.ts index 61afbd6..c6ce2a0 100644 --- a/src/knowledge/retriever/index.ts +++ b/src/knowledge/retriever/index.ts @@ -1,41 +1,74 @@ /** * Knowledge Retriever * - * Coordinates knowledge retrieval from multiple sources and the store. + * Coordinates knowledge retrieval from multiple sources and stores. + * Uses hybrid retrieval (FTS + vector) when embeddings are available. */ import { existsSync, mkdirSync } from 'fs'; -import { join } from 'path'; -import { KnowledgeStore } from '../store/sqlite'; +import { dirname, isAbsolute, join, resolve } from 'path'; +import { loadConfig, type Config } from '../../utils/config'; +import { configure as configureEmbedder, isEmbedderConfigured } from '../indexer/embedder'; import { loadFromSource } from '../sources'; +import { KnowledgeStore } from '../store/sqlite'; +import { VectorStore } from '../store/vector-store'; import type { RetrievedKnowledge, KnowledgeType, KnowledgeSourceConfig, FilesystemSourceConfig, KnowledgeDocument, + ApiSourceConfig, } from '../types'; +const DEFAULT_LIMIT = 20; +const DEFAULT_RRF_K = 60; +const DEFAULT_FTS_WEIGHT = 0.45; +const DEFAULT_VECTOR_WEIGHT = 0.55; + export interface RetrieverConfig { storePath: string; sources: KnowledgeSourceConfig[]; + vectorStorePath?: string; + defaultTopK?: number; + rerank?: boolean; + ftsWeight?: number; + vectorWeight?: number; + rrfK?: number; } export class KnowledgeRetriever { private store: KnowledgeStore; + private vectorStore: VectorStore | null = null; private config: RetrieverConfig; private initialized = false; + private vectorSearchEnabled = true; constructor(config: RetrieverConfig) { this.config = config; - // Ensure directory exists - const dir = join(config.storePath, '..'); - if (!existsSync(dir)) { - mkdirSync(dir, { recursive: true }); + const storeDir = dirname(resolve(config.storePath)); + if (!existsSync(storeDir)) { + mkdirSync(storeDir, { recursive: true }); } this.store = new KnowledgeStore(config.storePath); + + if (isEmbedderConfigured()) { + const vectorPath = this.resolveVectorStorePath(config.storePath, config.vectorStorePath); + const vectorDir = dirname(resolve(vectorPath)); + if (!existsSync(vectorDir)) { + mkdirSync(vectorDir, { recursive: true }); + } + this.vectorStore = new VectorStore(vectorPath); + } + } + + private resolveVectorStorePath(storePath: string, explicitPath?: string): string { + if (explicitPath && explicitPath.trim().length > 0) { + return explicitPath; + } + return join(dirname(storePath), 'vectors.db'); } /** @@ -56,10 +89,13 @@ export class KnowledgeRetriever { } else { added++; } + this.store.upsertDocument(doc); + await this.indexDocumentEmbeddings(doc, existing); } - // Update lastSyncTime for incremental sync support + // Update lastSyncTime for incremental sync support. + // This is in-memory only until config persistence is added. if ('lastSyncTime' in source) { (source as { lastSyncTime?: string }).lastSyncTime = new Date().toISOString(); } @@ -69,6 +105,48 @@ export class KnowledgeRetriever { return { added, updated }; } + private async indexDocumentEmbeddings( + doc: KnowledgeDocument, + existing: KnowledgeDocument | null + ): Promise { + if (!this.vectorStore || !this.vectorSearchEnabled) { + return; + } + + const hasEmbeddings = this.vectorStore.hasDocument(doc.id); + const unchanged = + Boolean(existing) && + existing?.updatedAt === doc.updatedAt && + existing?.content === doc.content && + hasEmbeddings; + + if (unchanged) { + return; + } + + try { + this.vectorStore.deleteDocument(doc.id); + if (doc.chunks.length === 0) { + return; + } + + await this.vectorStore.addChunks( + doc.chunks.map((chunk) => ({ + chunk, + documentTitle: doc.title, + type: doc.type, + services: doc.services, + })) + ); + } catch (error) { + this.vectorSearchEnabled = false; + console.warn( + 'Vector indexing disabled for current process due to embedding/indexing error:', + error + ); + } + } + /** * Initialize if not already done */ @@ -88,16 +166,53 @@ export class KnowledgeRetriever { typeFilter?: KnowledgeType[]; serviceFilter?: string[]; limit?: number; + rerank?: boolean; } = {} ): Promise { await this.ensureInitialized(); - const allResults = this.store.search(query, { - ...options, - limit: options.limit || 20, + const trimmedQuery = query.trim(); + if (!trimmedQuery) { + return { + runbooks: [], + postmortems: [], + architecture: [], + knownIssues: [], + }; + } + + const limit = options.limit || this.config.defaultTopK || DEFAULT_LIMIT; + const rerank = options.rerank ?? this.config.rerank ?? true; + const fusionLimit = Math.max(limit, Math.min(limit * 3, 100)); + + const ftsResults = this.store.search(trimmedQuery, { + typeFilter: options.typeFilter, + serviceFilter: options.serviceFilter, + limit: fusionLimit, }); - // Organize by type + let allResults = ftsResults; + if (rerank && this.vectorStore && this.vectorSearchEnabled) { + try { + const vectorResults = await this.vectorStore.search(trimmedQuery, { + topK: fusionLimit, + typeFilter: options.typeFilter, + serviceFilter: options.serviceFilter, + minScore: 0.15, + }); + + if (vectorResults.length > 0) { + allResults = this.combineHybridResults(trimmedQuery, ftsResults, vectorResults, { + topK: limit, + serviceFilter: options.serviceFilter, + }); + } + } catch (error) { + this.vectorSearchEnabled = false; + console.warn('Vector search disabled for current process:', error); + } + } + const knowledge: RetrievedKnowledge = { runbooks: [], postmortems: [], @@ -105,7 +220,7 @@ export class KnowledgeRetriever { knownIssues: [], }; - for (const chunk of allResults) { + for (const chunk of allResults.slice(0, limit)) { switch (chunk.type) { case 'runbook': knowledge.runbooks.push(chunk); @@ -125,6 +240,100 @@ export class KnowledgeRetriever { return knowledge; } + private combineHybridResults( + query: string, + ftsResults: ReturnType, + vectorResults: Awaited>, + options: { + topK: number; + serviceFilter?: string[]; + } + ): ReturnType { + const rrfK = this.config.rrfK ?? DEFAULT_RRF_K; + const ftsWeight = this.config.ftsWeight ?? DEFAULT_FTS_WEIGHT; + const vectorWeight = this.config.vectorWeight ?? DEFAULT_VECTOR_WEIGHT; + const intentBoostByType = this.getIntentBoostByType(query); + + const merged = new Map< + string, + { + chunk: ReturnType[number]; + score: number; + } + >(); + + for (let i = 0; i < ftsResults.length; i++) { + const chunk = ftsResults[i]; + const contribution = ftsWeight * (1 / (rrfK + i + 1)); + const existing = merged.get(chunk.id); + if (existing) { + existing.score += contribution; + } else { + merged.set(chunk.id, { chunk, score: contribution }); + } + } + + for (let i = 0; i < vectorResults.length; i++) { + const chunk = vectorResults[i]; + const contribution = vectorWeight * (1 / (rrfK + i + 1)); + const existing = merged.get(chunk.id); + if (existing) { + existing.score += contribution; + } else { + merged.set(chunk.id, { chunk, score: contribution }); + } + } + + for (const value of merged.values()) { + const typeBoost = intentBoostByType.get(value.chunk.type) || 0; + value.score += typeBoost; + + if (options.serviceFilter && options.serviceFilter.length > 0) { + const overlap = options.serviceFilter.filter((service) => + value.chunk.services.includes(service) + ).length; + if (overlap > 0) { + value.score += Math.min(0.08, overlap * 0.02); + } + } + } + + return Array.from(merged.values()) + .sort((a, b) => b.score - a.score) + .slice(0, options.topK) + .map((item) => ({ + ...item.chunk, + score: item.score, + })); + } + + private getIntentBoostByType(query: string): Map { + const q = query.toLowerCase(); + const boost = new Map(); + + if (q.includes('runbook') || q.includes('playbook') || q.includes('how to')) { + boost.set('runbook', 0.06); + boost.set('playbook', 0.04); + } + if (q.includes('postmortem') || q.includes('incident') || q.includes('root cause')) { + boost.set('postmortem', 0.05); + boost.set('known_issue', 0.03); + } + if ( + q.includes('architecture') || + q.includes('dependency') || + q.includes('topology') || + q.includes('design') + ) { + boost.set('architecture', 0.06); + } + if (q.includes('known issue') || q.includes('workaround') || q.includes('mitigation')) { + boost.set('known_issue', 0.06); + } + + return boost; + } + /** * Get runbooks for specific services */ @@ -157,28 +366,23 @@ export class KnowledgeRetriever { } /** - * Close the store + * Close stores. */ close(): void { this.store.close(); + this.vectorStore?.close(); } } -/** - * Create a retriever with default configuration - */ -export function createRetriever(baseDir: string = '.runbook'): KnowledgeRetriever { - const storePath = join(baseDir, 'knowledge.db'); - +function buildDefaultSources(baseDir: string): FilesystemSourceConfig[] { const sources: FilesystemSourceConfig[] = [ { type: 'filesystem', path: join(baseDir, 'runbooks'), - filePatterns: ['**/*.md', '**/*.yaml'], + filePatterns: ['**/*.md', '**/*.yaml', '**/*.yml'], }, ]; - // Also check for examples if (existsSync('examples/runbooks')) { sources.push({ type: 'filesystem', @@ -187,5 +391,251 @@ export function createRetriever(baseDir: string = '.runbook'): KnowledgeRetrieve }); } - return new KnowledgeRetriever({ storePath, sources }); + return sources; +} + +function resolvePathLike(value: string): string { + if (isAbsolute(value)) { + return value; + } + return resolve(process.cwd(), value); +} + +function normalizeApiAuth(value: unknown): ApiSourceConfig['auth'] | undefined { + if (!value || typeof value !== 'object') { + return undefined; + } + const obj = value as Record; + const type = obj.type; + const authValue = obj.value; + if ( + (type === 'bearer' || type === 'basic' || type === 'header') && + typeof authValue === 'string' && + authValue.trim().length > 0 + ) { + return { + type, + value: authValue, + }; + } + + // Backward compatibility: treat legacy auth.apiToken as bearer token. + const legacyToken = obj.apiToken; + if (typeof legacyToken === 'string' && legacyToken.trim().length > 0) { + return { + type: 'bearer', + value: legacyToken, + }; + } + + // Backward compatibility: treat legacy auth header as "Header: value". + const legacyEmail = obj.email; + if (typeof legacyEmail === 'string' && legacyEmail.trim().length > 0) { + return { + type: 'header', + value: legacyEmail, + }; + } + + return undefined; +} + +function readOptionalStringField(source: Record, key: string): string | undefined { + const value = source[key]; + if (typeof value === 'string' && value.trim().length > 0) { + return value; + } + return undefined; +} + +function normalizeConfiguredSources( + sources: Config['knowledge']['sources'], + baseDir: string +): KnowledgeSourceConfig[] { + const normalized: KnowledgeSourceConfig[] = []; + + for (const source of sources) { + switch (source.type) { + case 'filesystem': { + if (!source.path || source.path.trim().length === 0) { + console.warn('Skipping filesystem knowledge source without path.'); + continue; + } + normalized.push({ + type: 'filesystem', + path: resolvePathLike(source.path), + filePatterns: ['**/*.md', '**/*.yaml', '**/*.yml'], + watch: source.watch, + }); + break; + } + + case 'confluence': { + const auth = source.auth; + if (!source.baseUrl || !source.spaceKey || !auth?.email || !auth?.apiToken) { + console.warn('Skipping incomplete Confluence knowledge source.'); + continue; + } + normalized.push({ + type: 'confluence', + baseUrl: source.baseUrl, + spaceKey: source.spaceKey, + labels: source.labels, + auth: { + email: auth.email, + apiToken: auth.apiToken, + }, + lastSyncTime: source.lastSyncTime, + }); + break; + } + + case 'google_drive': { + if ( + !source.folderIds || + source.folderIds.length === 0 || + !source.clientId || + !source.clientSecret + ) { + console.warn('Skipping incomplete Google Drive knowledge source.'); + continue; + } + normalized.push({ + type: 'google_drive', + folderIds: source.folderIds, + clientId: source.clientId, + clientSecret: source.clientSecret, + refreshToken: source.refreshToken, + mimeTypes: source.mimeTypes, + includeSubfolders: source.includeSubfolders, + lastSyncTime: source.lastSyncTime, + }); + break; + } + + case 'notion': { + const sourceRecord = source as Record; + const notionApiKey = + readOptionalStringField(sourceRecord, 'apiKey') || + source.auth?.apiToken || + process.env.RUNBOOK_NOTION_API_KEY || + process.env.NOTION_API_KEY; + + if (!source.databaseId || !notionApiKey) { + console.warn('Skipping incomplete Notion knowledge source.'); + continue; + } + normalized.push({ + type: 'notion', + databaseId: source.databaseId, + apiKey: notionApiKey, + }); + break; + } + + case 'github': { + if (!source.repo) { + console.warn('Skipping incomplete GitHub knowledge source.'); + continue; + } + const sourceRecord = source as Record; + const explicitToken = readOptionalStringField(sourceRecord, 'token'); + normalized.push({ + type: 'github', + repo: source.repo, + branch: source.branch || 'main', + path: source.path || '', + token: + explicitToken || + source.auth?.apiToken || + process.env.RUNBOOK_GITHUB_TOKEN || + process.env.GITHUB_TOKEN, + }); + break; + } + + case 'api': { + if (!source.endpoint) { + console.warn('Skipping API knowledge source without endpoint.'); + continue; + } + normalized.push({ + type: 'api', + endpoint: source.endpoint, + auth: normalizeApiAuth(source.auth), + }); + break; + } + + default: { + console.warn(`Unsupported knowledge source type: ${source.type}`); + break; + } + } + } + + if (normalized.length === 0) { + return buildDefaultSources(baseDir); + } + + return normalized; +} + +function resolveConfigPathForBaseDir(baseDir: string): string | undefined { + const yamlPath = join(baseDir, 'config.yaml'); + if (existsSync(yamlPath)) { + return yamlPath; + } + const ymlPath = join(baseDir, 'config.yml'); + if (existsSync(ymlPath)) { + return ymlPath; + } + return undefined; +} + +/** + * Create a retriever with default local filesystem sources. + */ +export function createRetriever(baseDir: string = '.runbook'): KnowledgeRetriever { + const storePath = join(baseDir, 'knowledge.db'); + return new KnowledgeRetriever({ + storePath, + vectorStorePath: join(baseDir, 'vectors.db'), + sources: buildDefaultSources(baseDir), + }); +} + +/** + * Create a retriever from runtime config when available. + */ +export async function createConfiguredRetriever( + baseDir: string = '.runbook', + runtimeConfig?: Config +): Promise { + const configPath = resolveConfigPathForBaseDir(baseDir); + const config = runtimeConfig ?? (await loadConfig(configPath)); + + const embedderApiKey = + process.env.OPENAI_API_KEY || + (config.llm.provider === 'openai' ? (config.llm.apiKey ?? undefined) : undefined); + if (embedderApiKey) { + configureEmbedder(embedderApiKey, { + model: config.knowledge.store.embeddingModel, + }); + } + + const configuredStorePath = config.knowledge.store.path?.trim(); + const storePath = configuredStorePath + ? resolvePathLike(configuredStorePath) + : resolvePathLike(join(baseDir, 'knowledge.db')); + const vectorStorePath = join(dirname(storePath), 'vectors.db'); + const sources = normalizeConfiguredSources(config.knowledge.sources, baseDir); + + return new KnowledgeRetriever({ + storePath, + vectorStorePath, + sources, + defaultTopK: config.knowledge.retrieval.topK, + rerank: config.knowledge.retrieval.rerank, + }); } diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 1ea53ca..8eb4c93 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -5,7 +5,7 @@ * to Claude Code via the Model Context Protocol. */ -import { createRetriever, KnowledgeRetriever } from '../knowledge/retriever/index'; +import { createConfiguredRetriever, KnowledgeRetriever } from '../knowledge/retriever/index'; import type { RetrievedChunk, KnowledgeType } from '../knowledge/types'; /** @@ -386,6 +386,7 @@ async function handleListServices( export class MCPServer { private config: MCPServerConfig; private retriever: KnowledgeRetriever | null = null; + private retrieverPromise: Promise | null = null; constructor(config: Partial = {}) { this.config = { ...DEFAULT_CONFIG, ...config }; @@ -401,9 +402,12 @@ export class MCPServer { /** * Initialize the retriever */ - private getRetriever(): KnowledgeRetriever { + private async getRetriever(): Promise { if (!this.retriever) { - this.retriever = createRetriever(this.config.baseDir); + if (!this.retrieverPromise) { + this.retrieverPromise = createConfiguredRetriever(this.config.baseDir); + } + this.retriever = await this.retrieverPromise; } return this.retriever; } @@ -412,7 +416,7 @@ export class MCPServer { * Handle a tool call */ async handleToolCall(request: MCPToolCallRequest): Promise { - const retriever = this.getRetriever(); + const retriever = await this.getRetriever(); try { switch (request.name) { diff --git a/src/slack/gateway.ts b/src/slack/gateway.ts index ecb2d35..9d7e9eb 100644 --- a/src/slack/gateway.ts +++ b/src/slack/gateway.ts @@ -4,7 +4,7 @@ import { Agent } from '../agent/agent'; import { createLLMClient } from '../model/llm'; import { toolRegistry } from '../tools/registry'; import { skillRegistry } from '../skills/registry'; -import { createRetriever } from '../knowledge/retriever'; +import { createConfiguredRetriever } from '../knowledge/retriever'; import { loadConfig, type Config } from '../utils/config'; import { getRuntimeTools } from '../cli/runtime-tools'; import { configure as configureSlack, postMessage } from '../tools/incident/slack'; @@ -257,8 +257,8 @@ function verifySlackSignature( } } -function createAgentKnowledgeRetriever() { - const retriever = createRetriever(); +async function createAgentKnowledgeRetriever(config: Config) { + const retriever = await createConfiguredRetriever('.runbook', config); return { retrieve: async (context: { @@ -300,7 +300,7 @@ async function createRuntimeAgent(config: Config): Promise { llm, tools: runtimeTools, skills: runtimeSkills, - knowledgeRetriever: createAgentKnowledgeRetriever(), + knowledgeRetriever: await createAgentKnowledgeRetriever(config), config: { maxIterations: config.agent.maxIterations, maxHypothesisDepth: config.agent.maxHypothesisDepth, diff --git a/src/tools/registry.ts b/src/tools/registry.ts index ab3ef11..5ce3293 100644 --- a/src/tools/registry.ts +++ b/src/tools/registry.ts @@ -49,7 +49,7 @@ import { getQuickHealthCheck, COMMON_QUERIES, } from './observability/prometheus'; -import { createRetriever } from '../knowledge/retriever'; +import { createConfiguredRetriever } from '../knowledge/retriever'; import { AWS_SERVICES, getServiceById, @@ -775,13 +775,13 @@ async function executeAwsMutation( } // Global retriever instance -let retriever: ReturnType | null = null; +let retrieverPromise: ReturnType | null = null; function getRetriever() { - if (!retriever) { - retriever = createRetriever(); + if (!retrieverPromise) { + retrieverPromise = createConfiguredRetriever(); } - return retriever; + return retrieverPromise; } /** @@ -822,7 +822,7 @@ export const searchKnowledgeTool = defineTool( }, async (args) => { try { - const r = getRetriever(); + const r = await getRetriever(); const results = await r.search(args.query as string, { typeFilter: args.type_filter as | Array<'runbook' | 'postmortem' | 'architecture' | 'known_issue'>