From 7308cb82d1f61edb3a5c6e86e0087b943909eeaf Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 04:16:16 +0000 Subject: [PATCH 1/5] feat(memory): hybrid retrieval, trust feedback, and schema hardening MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inspired by Hermes Agent's Holographic memory provider, this adds three interlocking improvements to the cortex-engine cognitive loop: 1. Hybrid lexical+vector retrieval (FTS5 on SQLite, token-overlap fallback on JSON/Firestore). The `query` tool now merges BM25 full-text hits into the semantic candidate set, re-scoring them by cosine before ranking. Exact IDs, proper nouns, and rare terms that embeddings miss are now surfaced. Controlled by `lexical: false` to opt out. 2. Asymmetric trust feedback tool. New `feedback` tool lets agents close the retrieval loop: helpful memories gain +0.05 confidence, unhelpful ones lose -0.10. The asymmetry mirrors Hermes' holographic trust scoring — bad retrievals decay out of rankings faster than good ones earn their way in. Every event logged to `feedback_log` for `retrieval_audit`. 3. Schema hardening on SQLite. `last_retrieval_score`, `last_hop_count`, and `memory_origin` are now first-class persisted columns (with migration shims for existing DBs). FTS5 external-content index kept in sync by triggers. Six missing indexes added (edges, obs, memories, ops, beliefs). `recursive_triggers = ON` so INSERT OR REPLACE correctly fires the FTS delete trigger on upserts. https://claude.ai/code/session_01DAZ3GzRri9hqxkTyqmSpc4 --- src/core/store.ts | 10 +++ src/engines/memory.ts | 2 +- src/mcp/tools.ts | 2 + src/namespace/scoped-store.ts | 4 ++ src/stores/_lexical.ts | 69 ++++++++++++++++++++ src/stores/firestore.ts | 31 +++++++++ src/stores/json.ts | 5 ++ src/stores/sqlite.ts | 117 ++++++++++++++++++++++++++++++++-- src/tools/feedback.ts | 88 +++++++++++++++++++++++++ src/tools/query.ts | 29 +++++++++ 10 files changed, 349 insertions(+), 8 deletions(-) create mode 100644 src/stores/_lexical.ts create mode 100644 src/tools/feedback.ts diff --git a/src/core/store.ts b/src/core/store.ts index f2f14b3..eae0acd 100644 --- a/src/core/store.ts +++ b/src/core/store.ts @@ -54,6 +54,16 @@ export interface CortexStore { /** Find k nearest memories by embedding vector. Returns sorted by similarity desc. */ findNearest(embedding: number[], limit: number): Promise; + /** + * Lexical full-text search over memory name/definition/tags. Complements + * findNearest: catches exact-keyword matches that embeddings miss (IDs, + * proper nouns, rare terms). SQLite uses FTS5/BM25; JSON and Firestore + * fall back to token-overlap scoring. Scores are normalized to 0-1 but are + * NOT comparable to cosine similarity — rank order is the contract. + * Faded memories are excluded. An empty or stopword-only query returns []. + */ + searchText(text: string, limit: number): Promise; + /** Increment access_count, update last_accessed and FSRS fields. */ touchMemory(id: string, fsrsUpdates: Partial): Promise; diff --git a/src/engines/memory.ts b/src/engines/memory.ts index 94092e0..1528c45 100644 --- a/src/engines/memory.ts +++ b/src/engines/memory.ts @@ -127,7 +127,7 @@ export async function hydeExpand( * Compute cosine similarity between two equal-length vectors. * Returns 0 if either vector is zero-length. */ -function cosineSimilarity(a: number[], b: number[]): number { +export function cosineSimilarity(a: number[], b: number[]): number { let dot = 0, normA = 0, normB = 0; for (let i = 0; i < a.length; i++) { dot += a[i] * b[i]; diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index dd38848..8735328 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -17,6 +17,7 @@ import type { FederationClient } from '../federation/client.js'; // ─── Tool imports ──────────────────────────────────────────────────────────── import { queryTool } from '../tools/query.js'; +import { feedbackTool } from '../tools/feedback.js'; import { observeTool } from '../tools/observe.js'; import { wonderTool } from '../tools/wonder.js'; import { speculateTool } from '../tools/speculate.js'; @@ -206,6 +207,7 @@ export function createTools(): ToolDefinition[] { return [ // Core cognitive tools queryTool, + feedbackTool, observeTool, wonderTool, speculateTool, diff --git a/src/namespace/scoped-store.ts b/src/namespace/scoped-store.ts index 85aa9bd..9dd552a 100644 --- a/src/namespace/scoped-store.ts +++ b/src/namespace/scoped-store.ts @@ -46,6 +46,10 @@ export class ScopedStore implements CortexStore { return this.inner.findNearest(embedding, limit); } + searchText(text: string, limit: number): Promise { + return this.inner.searchText(text, limit); + } + touchMemory(id: string, fsrsUpdates: Partial): Promise { return this.inner.touchMemory(id, fsrsUpdates); } diff --git a/src/stores/_lexical.ts b/src/stores/_lexical.ts new file mode 100644 index 0000000..c9bfd84 --- /dev/null +++ b/src/stores/_lexical.ts @@ -0,0 +1,69 @@ +/** + * Shared lexical-search fallback for stores without native full-text search + * (JsonCortexStore, FirestoreCortexStore). SqliteCortexStore uses FTS5/BM25 + * instead. Scoring is simple weighted token overlap — good enough to surface + * exact-keyword matches that embedding search misses. + */ + +import type { Memory, MemorySummary, SearchResult } from '../core/types.js'; + +function toSummary(m: Memory): MemorySummary { + return { + id: m.id, + name: m.name, + definition: m.definition, + category: m.category, + salience: m.salience, + confidence: m.confidence, + access_count: m.access_count, + updated_at: m.updated_at, + tags: m.tags, + fsrs: m.fsrs, + provenance: m.provenance, + }; +} + +/** Lowercase alphanumeric tokens, 2+ chars. */ +export function tokenize(text: string): string[] { + return (text.toLowerCase().match(/[a-z0-9_]{2,}/g) ?? []); +} + +/** + * Score one memory against query tokens. Name hits weigh 2.0, tag hits 1.5, + * definition hits 1.0. Normalized by token count so the score stays in 0-1 + * (a token matching name+tag+definition still counts once, at max weight). + */ +function scoreMemory(memory: Memory, tokens: string[]): number { + const name = memory.name.toLowerCase(); + const definition = memory.definition.toLowerCase(); + const tags = memory.tags.map((t) => t.toLowerCase()); + + let total = 0; + for (const token of tokens) { + if (name.includes(token)) total += 2.0; + else if (tags.some((t) => t.includes(token))) total += 1.5; + else if (definition.includes(token)) total += 1.0; + } + return total / (tokens.length * 2.0); +} + +/** + * Rank `memories` by lexical overlap with `text`. Skips faded memories and + * zero-score candidates. Returns at most `limit` results, best first. + */ +export function lexicalSearch(memories: Memory[], text: string, limit: number): SearchResult[] { + const tokens = tokenize(text); + if (tokens.length === 0) return []; + + return memories + .filter((m) => !m.faded) + .map((m) => ({ memory: m, score: scoreMemory(m, tokens) })) + .filter((r) => r.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, limit) + .map(({ memory, score }) => ({ + memory: toSummary(memory), + score, + distance: 1 - score, + })); +} diff --git a/src/stores/firestore.ts b/src/stores/firestore.ts index fd4fcdd..307308f 100644 --- a/src/stores/firestore.ts +++ b/src/stores/firestore.ts @@ -14,6 +14,7 @@ import { randomUUID } from 'node:crypto'; import type { CortexStore, StoreCapabilities } from '../core/store.js'; import { CORTEX_STORE_SCHEMA_VERSION } from '../core/store.js'; import { validateNamespace } from './_validate.js'; +import { lexicalSearch } from './_lexical.js'; import type { Memory, MemorySummary, @@ -118,6 +119,9 @@ function docToMemory(id: string, data: DocumentData): Memory { faded: data.faded ?? false, salience_original: data.salience_original ?? undefined, provenance: docProvenance(data), + last_retrieval_score: data.last_retrieval_score ?? undefined, + last_hop_count: data.last_hop_count ?? undefined, + memory_origin: data.memory_origin ?? undefined, }; } @@ -281,6 +285,9 @@ export class FirestoreCortexStore implements CortexStore { faded: memory.faded ?? false, salience_original: memory.salience_original ?? null, provenance: provenanceData(memory.provenance) ?? null, + last_retrieval_score: memory.last_retrieval_score ?? null, + last_hop_count: memory.last_hop_count ?? null, + memory_origin: memory.memory_origin ?? null, }); return ref.id; } @@ -318,6 +325,9 @@ export class FirestoreCortexStore implements CortexStore { if (updates.provenance !== undefined) { data.provenance = provenanceData(updates.provenance) ?? null; } + if (updates.last_retrieval_score !== undefined) data.last_retrieval_score = updates.last_retrieval_score; + if (updates.last_hop_count !== undefined) data.last_hop_count = updates.last_hop_count; + if (updates.memory_origin !== undefined) data.memory_origin = updates.memory_origin; if (Object.keys(data).length === 0) return; await this.col('memories').doc(id).update(data); @@ -346,6 +356,14 @@ export class FirestoreCortexStore implements CortexStore { }); } + async searchText(text: string, limit: number): Promise { + // Firestore has no native full-text search; fall back to a token-overlap + // scan over all memories. Acceptable at cortex-engine scale (<10k); swap + // in an external search index if collections grow beyond that. + const memories = await this.getAllMemories(); + return lexicalSearch(memories, text, limit); + } + async touchMemory(id: string, fsrsUpdates: Partial): Promise { const data: Record = { access_count: _FieldValue!.increment(1), @@ -677,6 +695,9 @@ export class FirestoreCortexStore implements CortexStore { faded: memory.faded ?? false, salience_original: memory.salience_original ?? null, provenance: provenanceData(memory.provenance) ?? null, + last_retrieval_score: memory.last_retrieval_score ?? null, + last_hop_count: memory.last_hop_count ?? null, + memory_origin: memory.memory_origin ?? null, }); } @@ -831,6 +852,9 @@ class FirestoreTxnProxy implements CortexStore { faded: memory.faded ?? false, salience_original: memory.salience_original ?? null, provenance: provenanceData(memory.provenance) ?? null, + last_retrieval_score: memory.last_retrieval_score ?? null, + last_hop_count: memory.last_hop_count ?? null, + memory_origin: memory.memory_origin ?? null, }); return id; } @@ -867,11 +891,15 @@ class FirestoreTxnProxy implements CortexStore { if (updates.provenance !== undefined) { data.provenance = provenanceData(updates.provenance) ?? null; } + if (updates.last_retrieval_score !== undefined) data.last_retrieval_score = updates.last_retrieval_score; + if (updates.last_hop_count !== undefined) data.last_hop_count = updates.last_hop_count; + if (updates.memory_origin !== undefined) data.memory_origin = updates.memory_origin; if (Object.keys(data).length === 0) return; this.txn.update(this.col('memories').doc(id), data); } findNearest(): Promise { return this.unsupported('findNearest'); } + searchText(): Promise { return this.unsupported('searchText'); } async touchMemory(id: string, fsrsUpdates: Partial): Promise { const data: Record = { @@ -1068,6 +1096,9 @@ class FirestoreTxnProxy implements CortexStore { faded: memory.faded ?? false, salience_original: memory.salience_original ?? null, provenance: provenanceData(memory.provenance) ?? null, + last_retrieval_score: memory.last_retrieval_score ?? null, + last_hop_count: memory.last_hop_count ?? null, + memory_origin: memory.memory_origin ?? null, }); } diff --git a/src/stores/json.ts b/src/stores/json.ts index 1793703..2b1a782 100644 --- a/src/stores/json.ts +++ b/src/stores/json.ts @@ -14,6 +14,7 @@ import { randomUUID } from 'node:crypto'; import type { CortexStore, StoreCapabilities } from '../core/store.js'; import { CORTEX_STORE_SCHEMA_VERSION } from '../core/store.js'; import { validateNamespace } from './_validate.js'; +import { lexicalSearch } from './_lexical.js'; import type { Memory, MemorySummary, @@ -192,6 +193,10 @@ export class JsonCortexStore implements CortexStore { .slice(0, limit); } + async searchText(text: string, limit: number): Promise { + return lexicalSearch(Object.values(this.data.memories).map(m => clone(m)), text, limit); + } + async touchMemory(id: string, fsrsUpdates: Partial): Promise { const m = this.data.memories[id]; if (!m) return; diff --git a/src/stores/sqlite.ts b/src/stores/sqlite.ts index 4ab4fdf..abbe524 100644 --- a/src/stores/sqlite.ts +++ b/src/stores/sqlite.ts @@ -94,6 +94,8 @@ interface MemoryRow { faded: number; salience_original: number | null; prov_model_id: string | null; prov_model_family: string | null; prov_client: string | null; prov_agent: string | null; + last_retrieval_score: number | null; last_hop_count: number | null; + memory_origin: string | null; } interface ObservationRow { @@ -146,6 +148,9 @@ function rowToMemory(r: MemoryRow): Memory { }, faded: r.faded === 1, salience_original: r.salience_original ?? undefined, provenance: prov(r), + last_retrieval_score: r.last_retrieval_score ?? undefined, + last_hop_count: r.last_hop_count ?? undefined, + memory_origin: (r.memory_origin as Memory['memory_origin']) ?? undefined, }; } @@ -222,7 +227,8 @@ const SCHEMAS: Record = { fsrs_reps INTEGER NOT NULL DEFAULT 0, fsrs_lapses INTEGER NOT NULL DEFAULT 0, fsrs_state TEXT NOT NULL DEFAULT 'new', fsrs_last_review TEXT, faded INTEGER DEFAULT 0, salience_original REAL, - prov_model_id TEXT, prov_model_family TEXT, prov_client TEXT, prov_agent TEXT + prov_model_id TEXT, prov_model_family TEXT, prov_client TEXT, prov_agent TEXT, + last_retrieval_score REAL, last_hop_count INTEGER, memory_origin TEXT )`, observations: `CREATE TABLE IF NOT EXISTS %T ( id TEXT PRIMARY KEY, content TEXT NOT NULL, @@ -294,6 +300,10 @@ export class SqliteCortexStore implements CortexStore { // deadlock. See docs/concurrency.md. this.db.pragma('busy_timeout = 5000'); this.db.pragma('foreign_keys = ON'); + // INSERT OR REPLACE only fires delete triggers when recursive_triggers is + // on. Without it, upsert* leaves stale rows in the external-content FTS + // index (which is kept in sync purely by triggers). + this.db.pragma('recursive_triggers = ON'); this.ns = namespace ?? ''; this.createTables(); } @@ -307,13 +317,23 @@ export class SqliteCortexStore implements CortexStore { this.db.exec(sql.replace('%T', this.t(name))); } this.migrateSchema(); + this.createIndexes(); + this.createFtsTable(); } /** Add columns introduced after initial schema. Safe to run repeatedly (no-ops on new DBs). */ private migrateSchema(): void { - const obsTable = this.t('observations'); + this.addColumn(this.t('observations'), `content_type TEXT DEFAULT 'declarative'`); + // Retrieval feedback fields consumed by the dream pipeline's FSRS rating + // (see engines/cognition.ts). Older schemas dropped these silently. + this.addColumn(this.t('memories'), 'last_retrieval_score REAL'); + this.addColumn(this.t('memories'), 'last_hop_count INTEGER'); + this.addColumn(this.t('memories'), 'memory_origin TEXT'); + } + + private addColumn(table: string, columnDef: string): void { try { - this.db.exec(`ALTER TABLE ${obsTable} ADD COLUMN content_type TEXT DEFAULT 'declarative'`); + this.db.exec(`ALTER TABLE ${table} ADD COLUMN ${columnDef}`); } catch (err) { // better-sqlite3 surfaces ALTER TABLE errors via the `code` property. // SQLITE_ERROR with "duplicate column" / "already exists" is the @@ -326,6 +346,51 @@ export class SqliteCortexStore implements CortexStore { } } + private createIndexes(): void { + const idx = (name: string, table: string, cols: string) => + this.db.exec(`CREATE INDEX IF NOT EXISTS ${this.t(name)} ON ${this.t(table)} (${cols})`); + + idx('idx_edges_source', 'edges', 'source_id'); + idx('idx_edges_target', 'edges', 'target_id'); + idx('idx_obs_processed', 'observations', 'processed, created_at'); + idx('idx_memories_updated', 'memories', 'updated_at'); + idx('idx_ops_created', 'ops', 'created_at'); + idx('idx_beliefs_concept', 'beliefs', 'concept_id'); + } + + /** + * External-content FTS5 index over memory name/definition/tags, kept in + * sync by triggers. On first creation against a non-empty memories table, + * the index is rebuilt from existing rows. + */ + private createFtsTable(): void { + const mem = this.t('memories'); + const fts = this.t('memories_fts'); + + const exists = this.db.prepare( + `SELECT name FROM sqlite_master WHERE type = 'table' AND name = ?` + ).get(fts) !== undefined; + + this.db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS ${fts} USING fts5( + name, definition, tags, content='${mem}', content_rowid='rowid' + )`); + + this.db.exec(`CREATE TRIGGER IF NOT EXISTS ${this.t('memories_fts_ai')} AFTER INSERT ON ${mem} BEGIN + INSERT INTO ${fts}(rowid, name, definition, tags) VALUES (new.rowid, new.name, new.definition, new.tags); + END`); + this.db.exec(`CREATE TRIGGER IF NOT EXISTS ${this.t('memories_fts_ad')} AFTER DELETE ON ${mem} BEGIN + INSERT INTO ${fts}(${fts}, rowid, name, definition, tags) VALUES ('delete', old.rowid, old.name, old.definition, old.tags); + END`); + this.db.exec(`CREATE TRIGGER IF NOT EXISTS ${this.t('memories_fts_au')} AFTER UPDATE ON ${mem} BEGIN + INSERT INTO ${fts}(${fts}, rowid, name, definition, tags) VALUES ('delete', old.rowid, old.name, old.definition, old.tags); + INSERT INTO ${fts}(rowid, name, definition, tags) VALUES (new.rowid, new.name, new.definition, new.tags); + END`); + + if (!exists) { + this.db.exec(`INSERT INTO ${fts}(${fts}) VALUES ('rebuild')`); + } + } + // ─── Memory ──────────────────────────────────────────────────────────────── async putMemory(memory: Omit): Promise { @@ -334,12 +399,14 @@ export class SqliteCortexStore implements CortexStore { id, name, definition, category, salience, confidence, access_count, created_at, updated_at, last_accessed, source_files, embedding, tags, fsrs_stability, fsrs_difficulty, fsrs_reps, fsrs_lapses, fsrs_state, fsrs_last_review, - faded, salience_original, prov_model_id, prov_model_family, prov_client, prov_agent + faded, salience_original, prov_model_id, prov_model_family, prov_client, prov_agent, + last_retrieval_score, last_hop_count, memory_origin ) VALUES ( @id, @name, @definition, @category, @salience, @confidence, @access_count, @created_at, @updated_at, @last_accessed, @source_files, @embedding, @tags, @fsrs_stability, @fsrs_difficulty, @fsrs_reps, @fsrs_lapses, @fsrs_state, @fsrs_last_review, - @faded, @salience_original, @prov_model_id, @prov_model_family, @prov_client, @prov_agent + @faded, @salience_original, @prov_model_id, @prov_model_family, @prov_client, @prov_agent, + @last_retrieval_score, @last_hop_count, @memory_origin )`).run({ id, name: memory.name, definition: memory.definition, category: memory.category, salience: memory.salience, @@ -358,6 +425,9 @@ export class SqliteCortexStore implements CortexStore { prov_model_family: memory.provenance?.model_family ?? null, prov_client: memory.provenance?.client ?? null, prov_agent: memory.provenance?.agent ?? null, + last_retrieval_score: memory.last_retrieval_score ?? null, + last_hop_count: memory.last_hop_count ?? null, + memory_origin: memory.memory_origin ?? null, }); return id; } @@ -397,6 +467,9 @@ export class SqliteCortexStore implements CortexStore { vals.pmi = updates.provenance.model_id; vals.pmf = updates.provenance.model_family; vals.pc = updates.provenance.client; vals.pa = updates.provenance.agent; } + if (updates.last_retrieval_score !== undefined) { sets.push('last_retrieval_score = @lrs'); vals.lrs = updates.last_retrieval_score; } + if (updates.last_hop_count !== undefined) { sets.push('last_hop_count = @lhc'); vals.lhc = updates.last_hop_count; } + if (updates.memory_origin !== undefined) { sets.push('memory_origin = @mo'); vals.mo = updates.memory_origin; } if (sets.length === 0) return; this.db.prepare(`UPDATE ${this.t('memories')} SET ${sets.join(', ')} WHERE id = @id`).run(vals); } @@ -422,6 +495,31 @@ export class SqliteCortexStore implements CortexStore { })); } + async searchText(text: string, limit: number): Promise { + // Quote each token so user input can't break MATCH syntax (operators, + // unbalanced quotes). OR-join for recall; BM25 handles ranking. + const tokens = text.toLowerCase().match(/[a-z0-9_]{2,}/g) ?? []; + if (tokens.length === 0) return []; + const match = tokens.map((tok) => `"${tok}"`).join(' OR '); + + const rows = this.db.prepare( + `SELECT m.*, bm25(${this.t('memories_fts')}) AS fts_rank + FROM ${this.t('memories_fts')} f + JOIN ${this.t('memories')} m ON m.rowid = f.rowid + WHERE ${this.t('memories_fts')} MATCH ? AND m.faded = 0 + ORDER BY fts_rank + LIMIT ?` + ).all(match, limit) as Array; + + // bm25() is smaller-is-better (negative for matches). Map its magnitude + // to 0-1 monotonically: x/(x+1). Rank order is the contract, not the value. + return rows.map((row) => { + const x = Math.max(0, -row.fts_rank); + const score = x / (x + 1); + return { memory: rowToSummary(row), score, distance: 1 - score }; + }); + } + async touchMemory(id: string, fsrsUpdates: Partial): Promise { const now = new Date().toISOString(); const sets: string[] = ['access_count = access_count + 1', 'last_accessed = @now', 'updated_at = @now']; @@ -779,12 +877,14 @@ export class SqliteCortexStore implements CortexStore { id, name, definition, category, salience, confidence, access_count, created_at, updated_at, last_accessed, source_files, embedding, tags, fsrs_stability, fsrs_difficulty, fsrs_reps, fsrs_lapses, fsrs_state, fsrs_last_review, - faded, salience_original, prov_model_id, prov_model_family, prov_client, prov_agent + faded, salience_original, prov_model_id, prov_model_family, prov_client, prov_agent, + last_retrieval_score, last_hop_count, memory_origin ) VALUES ( @id, @name, @definition, @category, @salience, @confidence, @access_count, @created_at, @updated_at, @last_accessed, @source_files, @embedding, @tags, @fsrs_stability, @fsrs_difficulty, @fsrs_reps, @fsrs_lapses, @fsrs_state, @fsrs_last_review, - @faded, @salience_original, @prov_model_id, @prov_model_family, @prov_client, @prov_agent + @faded, @salience_original, @prov_model_id, @prov_model_family, @prov_client, @prov_agent, + @last_retrieval_score, @last_hop_count, @memory_origin )`).run({ id: memory.id, name: memory.name, definition: memory.definition, category: memory.category, salience: memory.salience, @@ -803,6 +903,9 @@ export class SqliteCortexStore implements CortexStore { prov_model_family: memory.provenance?.model_family ?? null, prov_client: memory.provenance?.client ?? null, prov_agent: memory.provenance?.agent ?? null, + last_retrieval_score: memory.last_retrieval_score ?? null, + last_hop_count: memory.last_hop_count ?? null, + memory_origin: memory.memory_origin ?? null, }); } diff --git a/src/tools/feedback.ts b/src/tools/feedback.ts new file mode 100644 index 0000000..0c9bd75 --- /dev/null +++ b/src/tools/feedback.ts @@ -0,0 +1,88 @@ +/** + * feedback — close the retrieval loop with asymmetric trust scoring. + * + * Pattern borrowed from Hermes Agent's holographic memory provider: helpful + * retrievals nudge trust up gently (+0.05), unhelpful ones cut it harder + * (-0.10). Asymmetry matters — one bad retrieval should cost more than one + * good retrieval earns, so polluted memories decay out of top ranks quickly. + * + * Confidence is the trust signal here: it already feeds composite ranking + * and consolidation decisions. Every event is also logged to feedback_log + * so retrieval_audit can correlate feedback with retrieval traces. + */ + +import type { ToolDefinition } from '../mcp/tools.js'; +import { str, optStr } from './_helpers.js'; + +const HELPFUL_DELTA = 0.05; +const UNHELPFUL_DELTA = -0.10; +const CONFIDENCE_FLOOR = 0.05; +const CONFIDENCE_CEIL = 1.0; + +export const feedbackTool: ToolDefinition = { + name: 'feedback', + category: 'memory', + description: 'Records whether a retrieved memory was actually helpful, adjusting its confidence asymmetrically (+0.05 helpful / -0.10 unhelpful) and logging the event for retrieval audits.', + whenToUse: 'You just acted on a retrieved memory and know whether it was accurate and useful — close the loop so future ranking improves.', + doNotUse: 'You want to correct a memory definition (use believe) or remove it entirely (use forget).', + inputSchema: { + type: 'object', + properties: { + id: { type: 'string', description: 'Memory id the feedback applies to' }, + helpful: { type: 'boolean', description: 'true if the memory was accurate and useful, false if wrong, stale, or misleading' }, + note: { type: 'string', description: 'Optional context — what made it helpful or unhelpful' }, + namespace: { type: 'string', description: 'Memory namespace (defaults to default)' }, + }, + required: ['id', 'helpful'], + }, + async handler(args, ctx) { + const id = str(args, 'id'); + const helpful = args['helpful']; + if (typeof helpful !== 'boolean') { + throw new Error('Missing required boolean argument: helpful'); + } + const note = optStr(args, 'note'); + const namespace = optStr(args, 'namespace'); + + const store = ctx.namespaces.getStore(namespace); + + const memory = await store.getMemory(id); + if (!memory) { + return { error: `Memory not found: ${id}` }; + } + + const delta = helpful ? HELPFUL_DELTA : UNHELPFUL_DELTA; + const confidenceAfter = Math.max( + CONFIDENCE_FLOOR, + Math.min(CONFIDENCE_CEIL, memory.confidence + delta), + ); + const now = new Date(); + + await store.withTransaction(async (txn) => { + await txn.updateMemory(id, { confidence: confidenceAfter, updated_at: now }); + // Helpful feedback is a successful retrieval — reinforce access stats. + // Unhelpful feedback deliberately does NOT touch: a failed retrieval + // should not look like recent use to the consolidation pipeline. + if (helpful) { + await txn.touchMemory(id, {}); + } + await txn.put('feedback_log', { + memory_id: id, + memory_name: memory.name, + helpful, + note: note ?? null, + confidence_before: memory.confidence, + confidence_after: confidenceAfter, + timestamp: now.toISOString(), + }); + }); + + return { + memory_id: id, + name: memory.name, + helpful, + confidence_before: memory.confidence, + confidence_after: confidenceAfter, + }; + }, +}; diff --git a/src/tools/query.ts b/src/tools/query.ts index 06d64a0..e20e099 100644 --- a/src/tools/query.ts +++ b/src/tools/query.ts @@ -7,6 +7,7 @@ import type { CortexStore } from '../core/store.js'; import { hydeExpand, spreadActivation, + cosineSimilarity, } from '../engines/memory.js'; import { retrievability, elapsedDaysSince } from '../engines/fsrs.js'; import { str, optStr, optNum, optBool, fireTriggers, fireBridges } from './_helpers.js'; @@ -26,6 +27,7 @@ export const queryTool: ToolDefinition = { hyde: { type: 'boolean', description: 'Expand query for better conceptual matches (default: true)' }, min_score: { type: 'number', description: 'Minimum similarity score threshold (default: 0.3). Results below this are dropped.' }, category: { type: 'string', description: 'Filter results to a specific category (belief, pattern, entity, topic, value, project, insight, observation)' }, + lexical: { type: 'boolean', description: 'Merge full-text keyword matches into the candidate set for exact-term recall (default: true)' }, }, required: ['text'], }, @@ -36,6 +38,7 @@ export const queryTool: ToolDefinition = { const useHyde = optBool(args, 'hyde', true); const minScore = optNum(args, 'min_score', 0.3); const categoryFilter = optStr(args, 'category'); + const useLexical = optBool(args, 'lexical', true); const store: CortexStore = ctx.namespaces.getStore(namespace); @@ -51,6 +54,30 @@ export const queryTool: ToolDefinition = { const fetchLimit = Math.max(limit * 3, 15); const nearest = await store.findNearest(queryEmbedding, fetchLimit); + // Hybrid recall: merge lexical (FTS/BM25) hits the vector search missed — + // exact IDs, proper nouns, rare terms. Lexical-only candidates are + // re-scored by cosine so downstream ranking stays uniform; memories + // without a usable embedding keep their lexical score. + let lexicalAdded = 0; + if (useLexical) { + try { + const lexicalHits = await store.searchText(text, fetchLimit); + const seen = new Set(nearest.map((r) => r.memory.id)); + for (const hit of lexicalHits) { + if (seen.has(hit.memory.id)) continue; + const memory = await store.getMemory(hit.memory.id); + if (!memory) continue; + const score = memory.embedding.length === queryEmbedding.length + ? cosineSimilarity(queryEmbedding, memory.embedding) + : hit.score; + nearest.push({ memory: hit.memory, score, distance: 1 - score }); + lexicalAdded++; + } + } catch { + // Lexical recall is best-effort — semantic results still stand. + } + } + // Spread activation for richer results — pass query embedding for query-conditioned BFS const activated = await spreadActivation(store, nearest, queryEmbedding); @@ -112,6 +139,8 @@ export const queryTool: ToolDefinition = { return { query: text, hyde_used: useHyde, + lexical_used: useLexical, + lexical_added: lexicalAdded, namespace: resolvedNs, count: filtered.length, results: filtered, From a40a18c74846bd0884055cf946d17797a2bcf20d Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 04:22:55 +0000 Subject: [PATCH 2/5] feat(hermes): auto-consolidation and tiered context loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two of the three Hermes Agent patterns that were not yet in cortex-engine: ## Thing 2 — Automatic session-end memory extraction Hermes syncs conversation turns to memory after each response and extracts on session end. SessionConsolidator (engines/auto-consolidate.ts) replicates this loop: - observe / wonder / speculate call consolidator.notifyObservation() after every successful write. - When pending count crosses AUTO_THRESHOLD (10) per namespace, dreamPhaseA fires in the background without blocking the calling tool. Phase A only (NREM: cluster → refine → create) — lightweight enough to run per session; REM stays in the scheduled dream cron. - SIGTERM / SIGINT / beforeExit handlers flush all namespaces with unprocessed observations before the process dies. - Background errors are swallowed (best-effort); CORTEX_DEBUG=1 surfaces them to stderr. ## Thing 3 — Tiered context loading (L0 / L1 / L2) New `context` tool mirrors Hermes OpenViking's progressive context tiers: - L0 (~100 tokens): top-3 by salience × FSRS retrievability. One vector search, no LLM call. Designed for system-prompt injection on every turn. - L1 (~2k tokens): semantic top-15, full definitions, tags, immediate graph edges (one hop). Working-memory refresh mid-conversation. - L2 (full): multi-anchor retrieval (4 query reformulations, Borda count), spreading activation (2 hops), full metadata including provenance, FSRS state, activation path. Maximum recall for deep research tasks. All tiers support HyDE expansion (default on, disable with hyde: false). L0 always skips HyDE — it is the latency-zero path. https://claude.ai/code/session_01DAZ3GzRri9hqxkTyqmSpc4 --- src/engines/auto-consolidate.ts | 96 ++++++++++++++ src/mcp/server.ts | 10 +- src/mcp/tools.ts | 5 + src/tools/context.ts | 218 ++++++++++++++++++++++++++++++++ src/tools/observe.ts | 6 +- src/tools/speculate.ts | 1 + src/tools/wonder.ts | 1 + 7 files changed, 335 insertions(+), 2 deletions(-) create mode 100644 src/engines/auto-consolidate.ts create mode 100644 src/tools/context.ts diff --git a/src/engines/auto-consolidate.ts b/src/engines/auto-consolidate.ts new file mode 100644 index 0000000..375ccb6 --- /dev/null +++ b/src/engines/auto-consolidate.ts @@ -0,0 +1,96 @@ +/** + * SessionConsolidator — Hermes-inspired automatic memory extraction. + * + * Hermes Agent syncs conversation turns to memory after each response and + * extracts memories on session end. This module replicates that loop for + * cortex-engine: + * + * - observe / wonder / speculate call notifyObservation() after every write. + * - When pending count hits AUTO_THRESHOLD per namespace, dreamPhaseA + * (NREM: cluster → refine → create) fires in the background without + * blocking the tool call that triggered it. + * - On process exit (SIGTERM / SIGINT), flush() runs dreamPhaseA across + * all namespaces with unprocessed observations. + * + * dreamPhaseA is intentionally lightweight — no REM (edges, abstraction, + * FSRS scoring). Those still belong in the scheduled full `dream` cycle. + * The point is that raw observations do not sit unprocessed across session + * boundaries; they become searchable memories within the same session. + */ + +import type { CortexStore } from '../core/store.js'; +import type { EmbedProvider } from '../core/embed.js'; +import type { LLMProvider } from '../core/llm.js'; +import type { NamespaceManager } from '../namespace/manager.js'; +import { dreamPhaseA } from './cognition.js'; + +/** Number of new observations per namespace that trigger an auto-consolidation. */ +export const AUTO_THRESHOLD = 10; + +export class SessionConsolidator { + /** pending[namespace] = count of new observations since last auto-run */ + private pending = new Map(); + /** running[namespace] = true while a background Phase A is in flight */ + private running = new Set(); + private shuttingDown = false; + + constructor( + private readonly namespaces: NamespaceManager, + private readonly embed: EmbedProvider, + private readonly llm: LLMProvider, + ) {} + + /** + * Call this after every successful observation write. When the pending + * count crosses AUTO_THRESHOLD, schedules a background Phase A run. + */ + notifyObservation(namespace: string): void { + const count = (this.pending.get(namespace) ?? 0) + 1; + this.pending.set(namespace, count); + if (count >= AUTO_THRESHOLD && !this.running.has(namespace)) { + this.runPhaseA(namespace); + } + } + + /** + * Flush all namespaces — called on process exit. Awaitable so the + * exit handler can give it a chance to complete before the process dies. + */ + async flush(): Promise { + this.shuttingDown = true; + const namespaces = this.namespaces.getNamespaceNames(); + await Promise.allSettled( + namespaces + .filter((ns) => (this.pending.get(ns) ?? 0) > 0) + .map((ns) => this.runPhaseA(ns, true)), + ); + } + + private runPhaseA(namespace: string, wait = false): Promise { + this.running.add(namespace); + this.pending.set(namespace, 0); + + const store: CortexStore = this.namespaces.getStore(namespace); + const nsConfig = this.namespaces.getConfig(namespace); + + const work: Promise = dreamPhaseA(store, this.embed, this.llm, { + observation_limit: 50, + similarity_merge: nsConfig.similarity_merge, + similarity_link: nsConfig.similarity_link, + }).then(() => {}).catch((err: unknown) => { + // Auto-consolidation is best-effort — never crash the serving process. + if (process.env['CORTEX_DEBUG']) { + process.stderr.write(`[auto-consolidate:${namespace}] ${String(err)}\n`); + } + }).finally(() => { + this.running.delete(namespace); + // If more observations arrived while we were running, re-trigger. + if (!this.shuttingDown && (this.pending.get(namespace) ?? 0) >= AUTO_THRESHOLD) { + void this.runPhaseA(namespace); + } + }); + + if (!wait) { void work; } + return wait ? work : Promise.resolve(); + } +} diff --git a/src/mcp/server.ts b/src/mcp/server.ts index e2b31c3..8e97d8d 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -37,6 +37,7 @@ import type { EmbedProvider } from '../core/embed.js'; import type { LLMProvider } from '../core/llm.js'; import { createTools, CORE_TOOLS, composeMcpDescription } from './tools.js'; import type { ToolContext, ToolDefinition } from './tools.js'; +import { SessionConsolidator } from '../engines/auto-consolidate.js'; import { loadPlugins } from '../plugins/loader.js'; // ─── Context Factory ────────────────────────────────────────────────────────── @@ -103,7 +104,8 @@ export async function createContext(config: CortexConfig): Promise { consolidator.flush().catch(() => {}); }; + process.once('SIGTERM', consolidatorFlush); + process.once('SIGINT', consolidatorFlush); + process.once('beforeExit', consolidatorFlush); + // 8. Filter active tools by namespace config + core set const activeToolNames = namespaces.getActiveTools(); for (const t of CORE_TOOLS) { diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 8735328..8f19f3e 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -13,6 +13,7 @@ import type { NamespaceManager } from '../namespace/manager.js'; import type { TriggerRegistry } from '../triggers/registry.js'; import type { BridgeRegistry } from '../bridges/registry.js'; import type { FederationClient } from '../federation/client.js'; +import type { SessionConsolidator } from '../engines/auto-consolidate.js'; // ─── Tool imports ──────────────────────────────────────────────────────────── @@ -84,6 +85,7 @@ import { contentUpdateTool } from '../tools/content-update.js'; // Vitals tools import { vitalsGetTool } from '../tools/vitals-get.js'; import { vitalsSetTool } from '../tools/vitals-set.js'; +import { contextTool } from '../tools/context.js'; // ─── Tool Context ───────────────────────────────────────────────────────────── @@ -99,6 +101,8 @@ export interface ToolContext { allTools: ToolDefinition[]; /** Federation client for multi-instance coordination (optional, only if configured). */ federation?: FederationClient; + /** Auto-consolidation engine — notified by observe/wonder/speculate after every write. */ + consolidator?: SessionConsolidator; } // ─── Tool Definition ────────────────────────────────────────────────────────── @@ -206,6 +210,7 @@ export interface ToolPlugin { export function createTools(): ToolDefinition[] { return [ // Core cognitive tools + contextTool, queryTool, feedbackTool, observeTool, diff --git a/src/tools/context.ts b/src/tools/context.ts new file mode 100644 index 0000000..0630d3a --- /dev/null +++ b/src/tools/context.ts @@ -0,0 +1,218 @@ +/** + * context — tiered memory context loader, inspired by Hermes Agent's + * OpenViking memory provider (L0 → L1 → L2 progressive loading). + * + * Three tiers trade latency for richness: + * + * L0 ~100 tokens Top-3 memories by salience × FSRS retrievability. + * Names + first 80 chars of definition only. + * Designed to be injected into every system prompt with + * near-zero latency (no LLM call, one vector search). + * + * L1 ~2k tokens Semantic top-15 with full definitions, tags, and + * immediate graph edges (one hop). Suitable for the + * working memory section of a system prompt or a + * context-window refresh mid-conversation. + * + * L2 full Multi-anchor retrieval across 4 query reformulations + * with Borda-count consensus, spreading activation (2 + * hops), and full memory metadata including provenance + * and FSRS state. Use when you need the richest possible + * recall and can tolerate extra latency. + * + * All tiers use HyDE query expansion by default (disable with hyde: false). + * Results are always filtered to faded=false and sorted by composite score. + */ + +import type { ToolDefinition } from '../mcp/tools.js'; +import { hydeExpand, spreadActivation, multiAnchorRetrieval } from '../engines/memory.js'; +import { retrievability, elapsedDaysSince } from '../engines/fsrs.js'; +import { str, optStr, optBool } from './_helpers.js'; + +type Tier = 'L0' | 'L1' | 'L2'; + +function parseTier(raw: unknown): Tier { + if (raw === 'L0' || raw === 'L1' || raw === 'L2') return raw; + return 'L1'; +} + +export const contextTool: ToolDefinition = { + name: 'context', + category: 'memory', + description: 'Tiered memory loader: L0 (top-3 names, ~100 tokens, instant), L1 (semantic top-15 + graph edges, ~2k tokens), L2 (multi-anchor full recall, max richness). Use L0 for system-prompt injection, L1 for mid-conversation refresh, L2 for deep research.', + whenToUse: 'You need to prefetch relevant memory before a response and want to control the token budget explicitly.', + doNotUse: 'You want ranked search with HyDE + spread activation for a specific question — use query instead.', + inputSchema: { + type: 'object', + properties: { + text: { type: 'string', description: 'Topic or question to retrieve context for' }, + tier: { type: 'string', enum: ['L0', 'L1', 'L2'], description: 'L0 = fast summary (~100 tokens), L1 = working memory (~2k tokens), L2 = full deep recall (default: L1)' }, + namespace: { type: 'string', description: 'Memory namespace (defaults to default)' }, + hyde: { type: 'boolean', description: 'Use HyDE query expansion (default: true; ignored for L0)' }, + }, + required: ['text'], + }, + + async handler(args, ctx) { + const text = str(args, 'text'); + const tier = parseTier(args['tier']); + const namespace = optStr(args, 'namespace'); + const useHyde = optBool(args, 'hyde', true); + + const store = ctx.namespaces.getStore(namespace); + const resolvedNs = namespace ?? ctx.namespaces.getDefaultNamespace(); + + // ── L0: salience × retrievability top-3 — no LLM call ────────────────── + if (tier === 'L0') { + const rawEmbedding = await ctx.embed.embed(text); + const candidates = await store.findNearest(rawEmbedding, 20); + const now = new Date(); + + const scored = candidates.map((r) => { + const daysSince = r.memory.fsrs.last_review + ? elapsedDaysSince(r.memory.fsrs.last_review) + : 0; + const ret = retrievability(r.memory.fsrs.stability, daysSince); + return { r, score: r.memory.salience * ret }; + }); + + const top = scored + .sort((a, b) => b.score - a.score) + .slice(0, 3); + + void now; + return { + tier: 'L0', + namespace: resolvedNs, + count: top.length, + memories: top.map(({ r }) => ({ + id: r.memory.id, + name: r.memory.name, + summary: r.memory.definition.slice(0, 80) + (r.memory.definition.length > 80 ? '…' : ''), + category: r.memory.category, + salience: r.memory.salience, + })), + }; + } + + // ── L1: semantic top-15 + immediate graph edges ───────────────────────── + if (tier === 'L1') { + const embedding = useHyde + ? await hydeExpand(text, ctx.llm, ctx.embed) + : await ctx.embed.embed(text); + + const nearest = await store.findNearest(embedding, 15); + + const now = new Date(); + const results = await Promise.all( + nearest.map(async (r) => { + const daysSince = r.memory.fsrs.last_review + ? elapsedDaysSince(r.memory.fsrs.last_review) + : 0; + const ret = retrievability(r.memory.fsrs.stability, daysSince); + const salienceFactor = 0.5 + r.memory.salience * 0.5; + const compositeScore = r.score * ret * salienceFactor; + + const edges = await store.getEdgesFrom(r.memory.id); + const links = edges.slice(0, 5).map((e) => ({ + target_id: e.target_id, + relation: e.relation, + weight: e.weight, + })); + + return { r, compositeScore, ret, links }; + }), + ); + + void now; + const sorted = results + .sort((a, b) => b.compositeScore - a.compositeScore); + + return { + tier: 'L1', + namespace: resolvedNs, + hyde_used: useHyde, + count: sorted.length, + memories: sorted.map(({ r, compositeScore, ret, links }) => ({ + id: r.memory.id, + name: r.memory.name, + definition: r.memory.definition, + category: r.memory.category, + tags: r.memory.tags, + salience: r.memory.salience, + confidence: r.memory.confidence, + score: r.score, + composite_score: compositeScore, + retrievability: ret, + links, + })), + }; + } + + // ── L2: multi-anchor retrieval + spread activation ────────────────────── + const candidates = await multiAnchorRetrieval(store, ctx.embed, ctx.llm, text, 10); + const embedding = useHyde + ? await hydeExpand(text, ctx.llm, ctx.embed) + : await ctx.embed.embed(text); + const activated = await spreadActivation(store, candidates, embedding, 2); + + const now = new Date(); + const results = await Promise.all( + activated.map(async (r) => { + const memory = await store.getMemory(r.memory.id); + if (!memory) return null; + + const daysSince = memory.fsrs.last_review + ? elapsedDaysSince(memory.fsrs.last_review) + : 0; + const ret = retrievability(memory.fsrs.stability, daysSince); + const salienceFactor = 0.5 + memory.salience * 0.5; + const compositeScore = r.score * ret * salienceFactor; + + const edges = await store.getEdgesFrom(memory.id); + return { + compositeScore, + data: { + id: memory.id, + name: memory.name, + definition: memory.definition, + category: memory.category, + tags: memory.tags, + salience: memory.salience, + confidence: memory.confidence, + access_count: memory.access_count, + score: r.score, + composite_score: compositeScore, + retrievability: ret, + hop_count: r.hop_count, + activation_path: r.activation_path, + memory_origin: memory.memory_origin, + provenance: memory.provenance, + fsrs_state: memory.fsrs.state, + fsrs_stability: memory.fsrs.stability, + last_accessed: memory.last_accessed.toISOString(), + updated_at: memory.updated_at.toISOString(), + links: edges.slice(0, 10).map((e) => ({ + target_id: e.target_id, + relation: e.relation, + weight: e.weight, + })), + }, + }; + }), + ); + + void now; + const filtered = results + .filter((r): r is NonNullable => r !== null) + .sort((a, b) => b.compositeScore - a.compositeScore); + + return { + tier: 'L2', + namespace: resolvedNs, + hyde_used: useHyde, + count: filtered.length, + memories: filtered.map((r) => r.data), + }; + }, +}; diff --git a/src/tools/observe.ts b/src/tools/observe.ts index e84a101..b79d682 100644 --- a/src/tools/observe.ts +++ b/src/tools/observe.ts @@ -191,8 +191,12 @@ export const observeTool: ToolDefinition = { message: `Observation stored (similarity: ${gate.max_similarity.toFixed(2)}) — will consolidate during next dream`, }; - // Fire triggers and bridges after observe + // Notify the auto-consolidator so it can fire Phase A in the background + // when enough observations have accumulated (threshold = AUTO_THRESHOLD). const resolvedNs = namespace ?? ctx.namespaces.getDefaultNamespace(); + ctx.consolidator?.notifyObservation(resolvedNs); + + // Fire triggers and bridges after observe await fireTriggers(ctx, resolvedNs, 'observe', text, { observation_id: id, decision: gate.decision }, ctx.allTools); await fireBridges(ctx, resolvedNs, 'observe', result, ctx.allTools); diff --git a/src/tools/speculate.ts b/src/tools/speculate.ts index 167d5d9..99cb30d 100644 --- a/src/tools/speculate.ts +++ b/src/tools/speculate.ts @@ -49,6 +49,7 @@ export const speculateTool: ToolDefinition = { }); const resolvedNs = namespace ?? ctx.namespaces.getDefaultNamespace(); + ctx.consolidator?.notifyObservation(resolvedNs); await fireTriggers(ctx, resolvedNs, 'speculate', text, { observation_id: id }, ctx.allTools); await fireBridges(ctx, resolvedNs, 'speculate', { id, namespace: resolvedNs }, ctx.allTools); diff --git a/src/tools/wonder.ts b/src/tools/wonder.ts index 1f77316..523c86c 100644 --- a/src/tools/wonder.ts +++ b/src/tools/wonder.ts @@ -49,6 +49,7 @@ export const wonderTool: ToolDefinition = { }); const resolvedNs = namespace ?? ctx.namespaces.getDefaultNamespace(); + ctx.consolidator?.notifyObservation(resolvedNs); await fireTriggers(ctx, resolvedNs, 'wonder', text, { observation_id: id }, ctx.allTools); await fireBridges(ctx, resolvedNs, 'wonder', { id, namespace: resolvedNs }, ctx.allTools); From d10e5ee9ffbbc55a4f7b679bc4acc37abf29a74b Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 20:33:23 +0000 Subject: [PATCH 3/5] test(hermes): cover searchText, feedback, auto-consolidation + audit doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 27 new tests: - search-text.test.ts — FTS5 keyword search (name/definition/tags), faded exclusion, MATCH-syntax injection safety, trigger sync across updateMemory and upsertMemory (the INSERT OR REPLACE path that needs recursive_triggers), FTS rebuild on reopened DBs, JSON lexical fallback ranking, and round-trip persistence of last_retrieval_score / last_hop_count / memory_origin. - feedback.test.ts — asymmetric deltas (+0.05/-0.10), floor/ceiling clamping, access reinforcement only on helpful, feedback_log contents, unknown-id and missing-arg errors. Runs against real in-memory SQLite so the withTransaction path is exercised. - auto-consolidate.test.ts — threshold triggering (exactly at 10, not below), counter reset, per-namespace isolation, flush() draining, and error swallowing. docs/hermes-audit.md records the audit findings (severity + fix), the three Hermes patterns borrowed, and the gaps deliberately left open (embedding blob storage, ANN scaling, generic-collection scans). https://claude.ai/code/session_01DAZ3GzRri9hqxkTyqmSpc4 --- docs/hermes-audit.md | 78 ++++++++++ src/engines/auto-consolidate.test.ts | 127 ++++++++++++++++ src/stores/search-text.test.ts | 214 +++++++++++++++++++++++++++ src/tools/feedback.test.ts | 124 ++++++++++++++++ 4 files changed, 543 insertions(+) create mode 100644 docs/hermes-audit.md create mode 100644 src/engines/auto-consolidate.test.ts create mode 100644 src/stores/search-text.test.ts create mode 100644 src/tools/feedback.test.ts diff --git a/docs/hermes-audit.md b/docs/hermes-audit.md new file mode 100644 index 0000000..e251b72 --- /dev/null +++ b/docs/hermes-audit.md @@ -0,0 +1,78 @@ +# Systems Audit & Hermes Agent Research — June 2026 + +Audit of cortex-engine's storage and retrieval systems, cross-referenced +against [Hermes Agent](https://github.com/nousresearch/hermes-agent) +(Nous Research, MIT) — a comparable self-improving agent whose memory +subsystem solved several problems we had open. + +## Audit findings (fixed) + +| # | Finding | Severity | Fix | +|---|---------|----------|-----| +| 1 | `last_retrieval_score`, `last_hop_count`, `memory_origin` were silently dropped by the SQLite and Firestore backends. The dream pipeline's FSRS rating (`engines/cognition.ts`, score phase) reads these fields to boost/penalize review ratings — that feedback loop **never fired** on either production backend. | High | Persisted as real columns/fields in both backends, with `ALTER TABLE` migration shims for existing SQLite DBs. | +| 2 | Zero secondary indexes in the SQLite schema. Edge traversal (`getEdgesFrom`, `getEdgesForMemories`), unprocessed-observation fetches, recency queries, and belief history were all full table scans. | Medium | Six indexes added: `edges(source_id)`, `edges(target_id)`, `observations(processed, created_at)`, `memories(updated_at)`, `ops(created_at)`, `beliefs(concept_id)`. | +| 3 | Retrieval was embedding-only. Exact identifiers, proper nouns, and rare terms that embed poorly were unfindable even when stored verbatim. | Medium | FTS5 + hybrid recall (see below). | +| 4 | No mechanism for an agent to report that a retrieved memory was wrong. Bad memories stayed highly ranked until a dream-cycle hindsight review happened to catch them. | Medium | `feedback` tool (see below). | +| 5 | Observations recorded via `observe`/`wonder`/`speculate` sat unprocessed until someone ran `dream` manually or via cron. Sessions that ended before a dream cycle left knowledge stranded. | Medium | Auto-consolidation (see below). | + +## Patterns borrowed from Hermes Agent + +### 1. Holographic memory — FTS5 + asymmetric trust scoring + +Hermes' Holographic provider pairs SQLite FTS5 full-text search with trust +scoring (+0.05 helpful / −0.10 unhelpful). + +- **`searchText()`** on `CortexStore`: FTS5/BM25 on SQLite (external-content + table, trigger-synced, `recursive_triggers=ON` so upserts stay in sync); + weighted token-overlap fallback on JSON/Firestore. +- **Hybrid recall in `query`**: lexical hits are merged into the vector + candidate set and re-scored by cosine, so ranking semantics stay uniform. + Disable with `lexical: false`. +- **`feedback` tool**: asymmetric confidence adjustment. The asymmetry is + the point — one bad retrieval costs twice what one good retrieval earns, + so polluted memories decay out of top ranks quickly. Events log to + `feedback_log` for correlation with `retrieval_audit` traces. + +### 2. Automatic memory extraction (session sync) + +Hermes syncs turns to memory after each response and extracts on session +end. `SessionConsolidator` (`engines/auto-consolidate.ts`): + +- `observe`/`wonder`/`speculate` notify it after every write. +- At 10 pending observations per namespace, `dreamPhaseA` (NREM only: + cluster → refine → create) runs in the background — non-blocking, + best-effort, re-triggers if more arrive mid-run. +- `SIGTERM`/`SIGINT`/`beforeExit` flush all pending namespaces. +- REM phases (edges, abstraction, FSRS scoring, hindsight) intentionally + stay in the scheduled `dream` cycle — they are LLM-heavy. + +### 3. Tiered context loading (L0 → L1 → L2) + +Hermes' OpenViking provider loads context progressively (~100 tokens → +~2k → full). The `context` tool mirrors this: + +- **L0** (~100 tokens): top-3 by salience × FSRS retrievability, names + + 80-char snippets. One vector search, no LLM call. For per-turn + system-prompt injection. +- **L1** (~2k tokens): semantic top-15 with definitions, tags, one-hop + edges. Mid-conversation working-memory refresh. +- **L2** (full): multi-anchor retrieval (Borda count over 4 query + reformulations) + 2-hop spreading activation + full metadata + (provenance, FSRS state, activation paths). Deep research. + +## Known gaps (deliberately not addressed) + +- **Embedding storage format**: memories store embeddings as JSON text + (~4× larger and slower to parse than `Float32Array` blobs; the read path + already supports blobs). Not switched because float32 rounding changes + exact-equality semantics relied on by migration round-trip tests. + Worth doing with a planned migration. +- **Brute-force ANN**: `findNearest` on SQLite scans every row. Documented + as fine below 10k memories; beyond that, consider `sqlite-vec` or an HNSW + sidecar. +- **Generic-collection queries**: `query()` on SQLite loads the entire + collection and filters in JS. Acceptable for current collection sizes + (threads, journal, vitals); revisit if any collection grows unbounded — + `feedback_log` is the most likely candidate. +- **Firestore `searchText`** falls back to a full-collection scan. Swap in + an external search index if cloud deployments grow. diff --git a/src/engines/auto-consolidate.test.ts b/src/engines/auto-consolidate.test.ts new file mode 100644 index 0000000..aff1553 --- /dev/null +++ b/src/engines/auto-consolidate.test.ts @@ -0,0 +1,127 @@ +/** + * Tests for SessionConsolidator — threshold-triggered background Phase A. + */ + +import { describe, it, expect, vi } from 'vitest'; +import { SessionConsolidator, AUTO_THRESHOLD } from './auto-consolidate.js'; +import type { CortexStore } from '../core/store.js'; +import type { NamespaceManager } from '../namespace/manager.js'; +import type { EmbedProvider } from '../core/embed.js'; +import type { LLMProvider } from '../core/llm.js'; + +function makeMockStore(): CortexStore { + return { + // Phase A entry point — empty result short-circuits cluster/refine/create + // so no embed/llm calls happen. The call itself is the trigger signal. + getUnprocessedObservations: vi.fn(() => Promise.resolve([])), + getEdgesForMemories: vi.fn(() => Promise.resolve([])), + findNearest: vi.fn(() => Promise.resolve([])), + getAllMemories: vi.fn(() => Promise.resolve([])), + } as unknown as CortexStore; +} + +function makeManager(stores: Record): NamespaceManager { + return { + getStore: vi.fn((ns?: string) => stores[ns ?? 'default']), + getConfig: vi.fn(() => ({ + description: 'test', + cognitive_tools: [], + collections_prefix: '', + similarity_merge: 0.85, + similarity_link: 0.5, + })), + getNamespaceNames: vi.fn(() => Object.keys(stores)), + getDefaultNamespace: vi.fn(() => 'default'), + } as unknown as NamespaceManager; +} + +const embed = { embed: vi.fn(() => Promise.resolve([1, 0, 0])) } as EmbedProvider; +const llm = { + generate: vi.fn(() => Promise.resolve('')), + generateJSON: vi.fn(() => Promise.resolve({})), +} as unknown as LLMProvider; + +async function settle(): Promise { + await new Promise((resolve) => setTimeout(resolve, 0)); +} + +describe('SessionConsolidator', () => { + it('does not trigger below the threshold', async () => { + const store = makeMockStore(); + const consolidator = new SessionConsolidator(makeManager({ default: store }), embed, llm); + + for (let i = 0; i < AUTO_THRESHOLD - 1; i++) { + consolidator.notifyObservation('default'); + } + await settle(); + + expect(store.getUnprocessedObservations).not.toHaveBeenCalled(); + }); + + it('triggers Phase A exactly at the threshold', async () => { + const store = makeMockStore(); + const consolidator = new SessionConsolidator(makeManager({ default: store }), embed, llm); + + for (let i = 0; i < AUTO_THRESHOLD; i++) { + consolidator.notifyObservation('default'); + } + await settle(); + + expect(store.getUnprocessedObservations).toHaveBeenCalledTimes(1); + }); + + it('resets the counter after triggering — next trigger needs a full batch', async () => { + const store = makeMockStore(); + const consolidator = new SessionConsolidator(makeManager({ default: store }), embed, llm); + + for (let i = 0; i < AUTO_THRESHOLD; i++) consolidator.notifyObservation('default'); + await settle(); + // A few more, below threshold — must not re-trigger + for (let i = 0; i < 3; i++) consolidator.notifyObservation('default'); + await settle(); + + expect(store.getUnprocessedObservations).toHaveBeenCalledTimes(1); + }); + + it('tracks namespaces independently', async () => { + const storeA = makeMockStore(); + const storeB = makeMockStore(); + const consolidator = new SessionConsolidator( + makeManager({ a: storeA, b: storeB }), embed, llm, + ); + + for (let i = 0; i < AUTO_THRESHOLD; i++) consolidator.notifyObservation('a'); + consolidator.notifyObservation('b'); + await settle(); + + expect(storeA.getUnprocessedObservations).toHaveBeenCalledTimes(1); + expect(storeB.getUnprocessedObservations).not.toHaveBeenCalled(); + }); + + it('flush() drains namespaces with pending observations', async () => { + const storeA = makeMockStore(); + const storeB = makeMockStore(); + const consolidator = new SessionConsolidator( + makeManager({ a: storeA, b: storeB }), embed, llm, + ); + + consolidator.notifyObservation('a'); // below threshold — pending + await consolidator.flush(); + + expect(storeA.getUnprocessedObservations).toHaveBeenCalledTimes(1); + expect(storeB.getUnprocessedObservations).not.toHaveBeenCalled(); + }); + + it('survives store errors without throwing', async () => { + const store = { + getUnprocessedObservations: vi.fn(() => Promise.reject(new Error('boom'))), + getEdgesForMemories: vi.fn(() => Promise.resolve([])), + findNearest: vi.fn(() => Promise.resolve([])), + getAllMemories: vi.fn(() => Promise.resolve([])), + } as unknown as CortexStore; + const consolidator = new SessionConsolidator(makeManager({ default: store }), embed, llm); + + for (let i = 0; i < AUTO_THRESHOLD; i++) consolidator.notifyObservation('default'); + await expect(consolidator.flush()).resolves.toBeUndefined(); + }); +}); diff --git a/src/stores/search-text.test.ts b/src/stores/search-text.test.ts new file mode 100644 index 0000000..9ff0825 --- /dev/null +++ b/src/stores/search-text.test.ts @@ -0,0 +1,214 @@ +/** + * Tests for searchText (FTS5 on SQLite, lexical fallback on JSON) and the + * persistence of retrieval-feedback fields (last_retrieval_score, + * last_hop_count, memory_origin) added in the Hermes-inspired hardening pass. + */ + +import { describe, it, expect } from 'vitest'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { SqliteCortexStore } from './sqlite.js'; +import { JsonCortexStore } from './json.js'; +import type { Memory } from '../core/types.js'; + +function makeMemory(overrides: Partial = {}): Omit { + const now = new Date(); + return { + name: 'Test memory', + definition: 'A memory about nothing in particular', + category: 'topic', + salience: 0.5, + confidence: 0.5, + access_count: 0, + created_at: now, + updated_at: now, + last_accessed: now, + source_files: [], + embedding: [0.1, 0.2, 0.3], + tags: [], + fsrs: { stability: 1, difficulty: 5, reps: 0, lapses: 0, state: 'new', last_review: null }, + ...overrides, + }; +} + +describe('SqliteCortexStore.searchText (FTS5)', () => { + it('finds memories by keyword in name', async () => { + const store = new SqliteCortexStore(':memory:'); + await store.putMemory(makeMemory({ name: 'Quantum entanglement basics' })); + await store.putMemory(makeMemory({ name: 'Sourdough starter care' })); + + const results = await store.searchText('quantum', 5); + + expect(results).toHaveLength(1); + expect(results[0].memory.name).toBe('Quantum entanglement basics'); + expect(results[0].score).toBeGreaterThan(0); + }); + + it('finds memories by keyword in definition and tags', async () => { + const store = new SqliteCortexStore(':memory:'); + await store.putMemory(makeMemory({ + name: 'Auth system', + definition: 'The service uses JWT tokens with RS256 signing', + })); + await store.putMemory(makeMemory({ + name: 'Deploy pipeline', + definition: 'CI runs on push', + tags: ['kubernetes', 'helm'], + })); + + expect((await store.searchText('JWT', 5))[0].memory.name).toBe('Auth system'); + expect((await store.searchText('kubernetes', 5))[0].memory.name).toBe('Deploy pipeline'); + }); + + it('excludes faded memories', async () => { + const store = new SqliteCortexStore(':memory:'); + await store.putMemory(makeMemory({ name: 'Visible quantum memory' })); + await store.putMemory(makeMemory({ name: 'Faded quantum memory', faded: true })); + + const results = await store.searchText('quantum', 5); + + expect(results).toHaveLength(1); + expect(results[0].memory.name).toBe('Visible quantum memory'); + }); + + it('returns [] for empty or unmatched queries', async () => { + const store = new SqliteCortexStore(':memory:'); + await store.putMemory(makeMemory({ name: 'Something' })); + + expect(await store.searchText('', 5)).toEqual([]); + expect(await store.searchText('!!! ???', 5)).toEqual([]); + expect(await store.searchText('zzzznonexistent', 5)).toEqual([]); + }); + + it('does not break on MATCH syntax characters in the query', async () => { + const store = new SqliteCortexStore(':memory:'); + await store.putMemory(makeMemory({ name: 'Quote handling' })); + + // None of these may throw — tokens are quoted before reaching MATCH. + await expect(store.searchText('"unbalanced', 5)).resolves.toBeDefined(); + await expect(store.searchText('a AND NOT (b', 5)).resolves.toBeDefined(); + await expect(store.searchText('col:value*', 5)).resolves.toBeDefined(); + }); + + it('stays in sync after updateMemory', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory({ name: 'Old topic name' })); + + await store.updateMemory(id, { name: 'Fresh xylophone research' }); + + expect(await store.searchText('xylophone', 5)).toHaveLength(1); + expect(await store.searchText('old topic', 5)).toHaveLength(0); + }); + + it('stays in sync after upsertMemory (INSERT OR REPLACE path)', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory({ name: 'Original zebra entry' })); + const memory = await store.getMemory(id); + + await store.upsertMemory({ ...memory!, name: 'Replaced walrus entry' }); + + expect(await store.searchText('walrus', 5)).toHaveLength(1); + expect(await store.searchText('zebra', 5)).toHaveLength(0); + }); + + it('rebuilds the FTS index for pre-existing databases', async () => { + const dir = mkdtempSync(join(tmpdir(), 'cortex-fts-')); + const dbPath = join(dir, 'test.db'); + try { + // Seed with one store instance, search with a second (simulates a DB + // created before the FTS table existed — the rebuild only fires when + // the FTS table is first created, which happens in instance 1 here, + // but reopening must not duplicate or lose rows). + const store1 = new SqliteCortexStore(dbPath); + await store1.putMemory(makeMemory({ name: 'Persistent giraffe fact' })); + + const store2 = new SqliteCortexStore(dbPath); + const results = await store2.searchText('giraffe', 5); + expect(results).toHaveLength(1); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); + +describe('JsonCortexStore.searchText (lexical fallback)', () => { + it('finds matches and ranks name hits above definition hits', async () => { + const dir = mkdtempSync(join(tmpdir(), 'cortex-json-')); + try { + const store = new JsonCortexStore(join(dir, 'test.json')); + await store.putMemory(makeMemory({ + name: 'Banana cultivation', + definition: 'Growing tropical fruit', + })); + await store.putMemory(makeMemory({ + name: 'Grocery list', + definition: 'Need to buy banana and milk', + })); + await store.putMemory(makeMemory({ name: 'Unrelated', definition: 'Nothing here' })); + + const results = await store.searchText('banana', 5); + + expect(results).toHaveLength(2); + expect(results[0].memory.name).toBe('Banana cultivation'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('excludes faded memories', async () => { + const dir = mkdtempSync(join(tmpdir(), 'cortex-json-')); + try { + const store = new JsonCortexStore(join(dir, 'test.json')); + await store.putMemory(makeMemory({ name: 'Faded falcon', faded: true })); + + expect(await store.searchText('falcon', 5)).toEqual([]); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); + +describe('retrieval-feedback field persistence (SQLite)', () => { + it('persists memory_origin through putMemory', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory({ memory_origin: 'dream' })); + + expect((await store.getMemory(id))!.memory_origin).toBe('dream'); + }); + + it('persists last_retrieval_score and last_hop_count through updateMemory', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory()); + + await store.updateMemory(id, { last_retrieval_score: 0.93, last_hop_count: 1 }); + + const memory = await store.getMemory(id); + expect(memory!.last_retrieval_score).toBeCloseTo(0.93); + expect(memory!.last_hop_count).toBe(1); + }); + + it('returns undefined for fields never set', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory()); + + const memory = await store.getMemory(id); + expect(memory!.last_retrieval_score).toBeUndefined(); + expect(memory!.last_hop_count).toBeUndefined(); + expect(memory!.memory_origin).toBeUndefined(); + }); + + it('survives an upsert round-trip', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory({ memory_origin: 'abstract' })); + await store.updateMemory(id, { last_retrieval_score: 0.8, last_hop_count: 2 }); + + const memory = await store.getMemory(id); + await store.upsertMemory(memory!); + + const after = await store.getMemory(id); + expect(after!.memory_origin).toBe('abstract'); + expect(after!.last_retrieval_score).toBeCloseTo(0.8); + expect(after!.last_hop_count).toBe(2); + }); +}); diff --git a/src/tools/feedback.test.ts b/src/tools/feedback.test.ts new file mode 100644 index 0000000..30e2e62 --- /dev/null +++ b/src/tools/feedback.test.ts @@ -0,0 +1,124 @@ +/** + * Tests for the feedback tool — asymmetric trust scoring against a real + * in-memory SQLite store (transactions + feedback_log included). + */ + +import { describe, it, expect, vi } from 'vitest'; +import { feedbackTool } from './feedback.js'; +import { SqliteCortexStore } from '../stores/sqlite.js'; +import type { ToolContext } from '../mcp/tools.js'; +import type { Memory } from '../core/types.js'; + +function makeMemory(confidence: number): Omit { + const now = new Date(); + return { + name: 'Test memory', + definition: 'A fact under evaluation', + category: 'topic', + salience: 0.5, + confidence, + access_count: 0, + created_at: now, + updated_at: now, + last_accessed: now, + source_files: [], + embedding: [0.1, 0.2, 0.3], + tags: [], + fsrs: { stability: 1, difficulty: 5, reps: 0, lapses: 0, state: 'new', last_review: null }, + }; +} + +function makeContext(store: SqliteCortexStore): ToolContext { + return { + namespaces: { + getStore: vi.fn(() => store), + getDefaultNamespace: vi.fn(() => 'default'), + }, + embed: {}, + llm: {}, + session: {}, + triggers: {}, + bridges: {}, + allTools: [], + } as unknown as ToolContext; +} + +describe('feedbackTool', () => { + it('helpful: +0.05 confidence and access reinforced', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory(0.5)); + + const result = await feedbackTool.handler({ id, helpful: true }, makeContext(store)); + + expect(result).toMatchObject({ helpful: true, confidence_before: 0.5 }); + const memory = await store.getMemory(id); + expect(memory!.confidence).toBeCloseTo(0.55); + expect(memory!.access_count).toBe(1); + }); + + it('unhelpful: -0.10 confidence and access NOT reinforced', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory(0.5)); + + await feedbackTool.handler({ id, helpful: false }, makeContext(store)); + + const memory = await store.getMemory(id); + expect(memory!.confidence).toBeCloseTo(0.4); + expect(memory!.access_count).toBe(0); + }); + + it('clamps confidence at the floor (0.05)', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory(0.1)); + + await feedbackTool.handler({ id, helpful: false }, makeContext(store)); + + expect((await store.getMemory(id))!.confidence).toBeCloseTo(0.05); + }); + + it('clamps confidence at the ceiling (1.0)', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory(0.98)); + + await feedbackTool.handler({ id, helpful: true }, makeContext(store)); + + expect((await store.getMemory(id))!.confidence).toBeCloseTo(1.0); + }); + + it('writes a feedback_log entry with before/after values', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory(0.5)); + + await feedbackTool.handler({ id, helpful: false, note: 'stale info' }, makeContext(store)); + + const log = await store.query('feedback_log', []); + expect(log).toHaveLength(1); + expect(log[0]).toMatchObject({ + memory_id: id, + helpful: false, + note: 'stale info', + confidence_before: 0.5, + }); + expect(log[0]['confidence_after'] as number).toBeCloseTo(0.4); + }); + + it('returns an error for unknown memory ids', async () => { + const store = new SqliteCortexStore(':memory:'); + + const result = await feedbackTool.handler( + { id: 'no-such-id', helpful: true }, + makeContext(store), + ); + + expect(result).toMatchObject({ error: 'Memory not found: no-such-id' }); + }); + + it('rejects a missing helpful flag', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory(0.5)); + + await expect( + feedbackTool.handler({ id }, makeContext(store)), + ).rejects.toThrow('Missing required boolean argument: helpful'); + }); +}); From abaff9f7166d99f31f77383841408cb1f1f36a4b Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 21:04:07 +0000 Subject: [PATCH 4/5] Store SQLite embeddings as Float32Array blobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Embeddings were stored as JSON text (~4x larger, parsed on every read) even though the read path already understood Float32Array blobs. All write paths (putMemory, updateMemory, upsertMemory, putObservation, upsertObservation) now encode blobs, and legacy JSON-text rows are converted in place at store-open time — idempotent, only text-typed rows are touched. Because float32 truncation changes embedding values vs the float64 kept by the JSON backend, verifyMigration now compares embeddings at float32 precision (Math.fround) so json->sqlite migrations verify clean. https://claude.ai/code/session_01DAZ3GzRri9hqxkTyqmSpc4 --- docs/hermes-audit.md | 14 ++-- src/bin/migrate-cmd.ts | 6 +- src/stores/sqlite.test.ts | 160 ++++++++++++++++++++++++++++++++++++++ src/stores/sqlite.ts | 52 ++++++++++--- 4 files changed, 217 insertions(+), 15 deletions(-) diff --git a/docs/hermes-audit.md b/docs/hermes-audit.md index e251b72..81468f9 100644 --- a/docs/hermes-audit.md +++ b/docs/hermes-audit.md @@ -60,13 +60,17 @@ Hermes' OpenViking provider loads context progressively (~100 tokens → reformulations) + 2-hop spreading activation + full metadata (provenance, FSRS state, activation paths). Deep research. +## Follow-up work (done) + +- **Embedding storage format** (June 2026): SQLite now stores embeddings as + raw `Float32Array` blobs (~4× smaller, parse-free reads). Legacy JSON-text + rows are converted in place when the store is opened — idempotent, only + text-typed rows are touched. Embeddings are float32-truncated on write, so + cross-backend comparisons (e.g. `verifyMigration` for json→sqlite) compare + at float32 precision via `Math.fround`. + ## Known gaps (deliberately not addressed) -- **Embedding storage format**: memories store embeddings as JSON text - (~4× larger and slower to parse than `Float32Array` blobs; the read path - already supports blobs). Not switched because float32 rounding changes - exact-equality semantics relied on by migration round-trip tests. - Worth doing with a planned migration. - **Brute-force ANN**: `findNearest` on SQLite scans every row. Documented as fine below 10k memories; beyond that, consider `sqlite-vec` or an HNSW sidecar. diff --git a/src/bin/migrate-cmd.ts b/src/bin/migrate-cmd.ts index 2a43cad..4c741f5 100644 --- a/src/bin/migrate-cmd.ts +++ b/src/bin/migrate-cmd.ts @@ -737,8 +737,12 @@ function deepEqualJson(a: unknown, b: unknown): boolean { } function jsonNormalize(value: unknown): string { - return JSON.stringify(value, (_key, v) => { + return JSON.stringify(value, (key, v) => { if (v instanceof Date) return v.toISOString(); + // SQLite stores embeddings as float32 blobs while JSON keeps full + // float64, so compare embeddings at float32 precision — otherwise a + // json→sqlite migration reports value diffs on every sampled memory. + if (key === 'embedding' && Array.isArray(v)) return v.map(Math.fround); return v; }, 0); } diff --git a/src/stores/sqlite.test.ts b/src/stores/sqlite.test.ts index 8d64891..3c1c6de 100644 --- a/src/stores/sqlite.test.ts +++ b/src/stores/sqlite.test.ts @@ -4,7 +4,11 @@ import { describe, it, expect } from 'vitest'; import Database from 'better-sqlite3'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; import { SqliteCortexStore } from './sqlite.js'; +import type { Memory } from '../core/types.js'; interface StoreInternals { db: Database.Database; @@ -59,3 +63,159 @@ describe('SqliteCortexStore.findNearest', () => { expect(results.map(r => r.memory.id)).not.toContain('mem-empty'); }); }); + +function makeMemory(overrides: Partial = {}): Omit { + const now = new Date(); + return { + name: 'Test memory', + definition: 'A memory about nothing in particular', + category: 'topic', + salience: 0.5, + confidence: 0.5, + access_count: 0, + created_at: now, + updated_at: now, + last_accessed: now, + source_files: [], + embedding: [0.1, 0.2, 0.3], + tags: [], + fsrs: { stability: 1, difficulty: 5, reps: 0, lapses: 0, state: 'new', last_review: null }, + ...overrides, + }; +} + +describe('embedding blob storage', () => { + it('writes embeddings as BLOBs and reads them back at float32 precision', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory({ embedding: [0.1, 0.2, 0.3] })); + + const stored = getDb(store) + .prepare(`SELECT typeof(embedding) AS t FROM memories WHERE id = ?`) + .get(id) as { t: string }; + expect(stored.t).toBe('blob'); + + const memory = await store.getMemory(id); + expect(memory!.embedding).toHaveLength(3); + expect(memory!.embedding[0]).toBeCloseTo(0.1, 6); + expect(memory!.embedding[1]).toBeCloseTo(0.2, 6); + expect(memory!.embedding[2]).toBeCloseTo(0.3, 6); + }); + + it('updateMemory and upsertMemory also write BLOBs', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory()); + + await store.updateMemory(id, { embedding: [0.4, 0.5] }); + let row = getDb(store) + .prepare(`SELECT typeof(embedding) AS t FROM memories WHERE id = ?`) + .get(id) as { t: string }; + expect(row.t).toBe('blob'); + expect((await store.getMemory(id))!.embedding[0]).toBeCloseTo(0.4, 6); + + await store.upsertMemory({ ...(await store.getMemory(id))!, embedding: [0.6] }); + row = getDb(store) + .prepare(`SELECT typeof(embedding) AS t FROM memories WHERE id = ?`) + .get(id) as { t: string }; + expect(row.t).toBe('blob'); + expect((await store.getMemory(id))!.embedding[0]).toBeCloseTo(0.6, 6); + }); + + it('handles empty embeddings as zero-length BLOBs', async () => { + const store = new SqliteCortexStore(':memory:'); + const id = await store.putMemory(makeMemory({ embedding: [] })); + + expect((await store.getMemory(id))!.embedding).toEqual([]); + }); + + it('stores observation embeddings as BLOBs and null stays null', async () => { + const store = new SqliteCortexStore(':memory:'); + const now = new Date(); + const base = { + content: 'obs', source_file: '', source_section: '', salience: 0.5, + processed: false, prediction_error: null, created_at: now, updated_at: now, + keywords: [], content_type: 'declarative' as const, + }; + const withEmb = await store.putObservation({ ...base, embedding: [0.1, 0.9] }); + const withoutEmb = await store.putObservation({ ...base, embedding: null }); + + const rows = getDb(store) + .prepare(`SELECT id, typeof(embedding) AS t FROM observations`) + .all() as { id: string; t: string }[]; + expect(rows.find(r => r.id === withEmb)!.t).toBe('blob'); + expect(rows.find(r => r.id === withoutEmb)!.t).toBe('null'); + + const obs = (await store.getUnprocessedObservations(10)).find(o => o.id === withEmb); + expect(obs!.embedding![1]).toBeCloseTo(0.9, 6); + }); +}); + +describe('legacy JSON-text embedding migration', () => { + it('converts JSON-text embeddings to BLOBs when an existing DB is opened', async () => { + const dir = mkdtempSync(join(tmpdir(), 'cortex-emb-')); + const dbPath = join(dir, 'test.db'); + try { + // Simulate a legacy row written by the pre-blob format. + const store1 = new SqliteCortexStore(dbPath); + insertMemoryWithRawEmbedding(store1, 'legacy', JSON.stringify([0.1, 0.2, 0.3])); + getDb(store1).close(); + + const store2 = new SqliteCortexStore(dbPath); + const row = getDb(store2) + .prepare(`SELECT typeof(embedding) AS t FROM memories WHERE id = 'legacy'`) + .get() as { t: string }; + expect(row.t).toBe('blob'); + + const memory = await store2.getMemory('legacy'); + expect(memory!.embedding).toHaveLength(3); + expect(memory!.embedding[0]).toBeCloseTo(0.1, 6); + + // Migrated rows must remain searchable. + const results = await store2.findNearest([0.1, 0.2, 0.3], 5); + expect(results.map(r => r.memory.id)).toContain('legacy'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('converts legacy observation embeddings and leaves null untouched', async () => { + const dir = mkdtempSync(join(tmpdir(), 'cortex-emb-')); + const dbPath = join(dir, 'test.db'); + try { + const store1 = new SqliteCortexStore(dbPath); + const now = new Date().toISOString(); + const insert = getDb(store1).prepare( + `INSERT INTO observations (id, content, created_at, updated_at, embedding) + VALUES (?, ?, ?, ?, ?)`, + ); + insert.run('legacy-obs', 'text emb', now, now, JSON.stringify([1, 0])); + insert.run('null-obs', 'no emb', now, now, null); + getDb(store1).close(); + + const store2 = new SqliteCortexStore(dbPath); + const rows = getDb(store2) + .prepare(`SELECT id, typeof(embedding) AS t FROM observations`) + .all() as { id: string; t: string }[]; + expect(rows.find(r => r.id === 'legacy-obs')!.t).toBe('blob'); + expect(rows.find(r => r.id === 'null-obs')!.t).toBe('null'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('is idempotent — reopening an already-converted DB changes nothing', async () => { + const dir = mkdtempSync(join(tmpdir(), 'cortex-emb-')); + const dbPath = join(dir, 'test.db'); + try { + const store1 = new SqliteCortexStore(dbPath); + const id = await store1.putMemory(makeMemory({ embedding: [0.7, 0.8] })); + getDb(store1).close(); + + const store2 = new SqliteCortexStore(dbPath); + const memory = await store2.getMemory(id); + expect(memory!.embedding[0]).toBeCloseTo(0.7, 6); + expect(memory!.embedding[1]).toBeCloseTo(0.8, 6); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/src/stores/sqlite.ts b/src/stores/sqlite.ts index abbe524..316c851 100644 --- a/src/stores/sqlite.ts +++ b/src/stores/sqlite.ts @@ -3,7 +3,9 @@ * * Uses better-sqlite3 for synchronous SQLite access wrapped in async interface. * Vector search uses brute-force cosine similarity (sufficient for <10k memories). - * Dates stored as ISO-8601 strings. Arrays stored as JSON text. + * Dates stored as ISO-8601 strings. Arrays stored as JSON text, except + * embeddings, which are raw Float32Array blobs (legacy JSON-text embeddings + * are converted in place at open time and still readable either way). */ import Database from 'better-sqlite3'; @@ -72,6 +74,15 @@ function parseEmbedding(data: string | Buffer | null): number[] { try { return JSON.parse(data) as number[]; } catch { return []; } } +/** + * Encode embedding as a raw Float32Array blob — ~4× smaller than JSON text + * and parse-free on read. Float64 inputs are truncated to float32; consumers + * comparing embeddings across backends must compare at float32 precision. + */ +function encodeEmbedding(embedding: number[] | null | undefined): Buffer { + return Buffer.from(new Float32Array(embedding ?? []).buffer); +} + function prov(row: { prov_model_id?: string | null; prov_model_family?: string | null; prov_client?: string | null; prov_agent?: string | null }): ModelProvenance | undefined { if (!row.prov_model_id) return undefined; return { @@ -101,7 +112,7 @@ interface MemoryRow { interface ObservationRow { id: string; content: string; source_file: string; source_section: string; salience: number; processed: number; prediction_error: number | null; - created_at: string; updated_at: string; embedding: string | null; + created_at: string; updated_at: string; embedding: string | Buffer | null; keywords: string; content_type: string | null; prov_model_id: string | null; prov_model_family: string | null; prov_client: string | null; prov_agent: string | null; @@ -221,7 +232,7 @@ const SCHEMAS: Record = { category TEXT NOT NULL, salience REAL NOT NULL DEFAULT 0.5, confidence REAL NOT NULL DEFAULT 0.5, access_count INTEGER NOT NULL DEFAULT 0, created_at TEXT NOT NULL, updated_at TEXT NOT NULL, last_accessed TEXT NOT NULL, - source_files TEXT NOT NULL DEFAULT '[]', embedding TEXT NOT NULL DEFAULT '[]', + source_files TEXT NOT NULL DEFAULT '[]', embedding BLOB NOT NULL DEFAULT (x''), tags TEXT NOT NULL DEFAULT '[]', fsrs_stability REAL NOT NULL DEFAULT 3.1262, fsrs_difficulty REAL NOT NULL DEFAULT 7.2102, fsrs_reps INTEGER NOT NULL DEFAULT 0, fsrs_lapses INTEGER NOT NULL DEFAULT 0, @@ -235,7 +246,7 @@ const SCHEMAS: Record = { source_file TEXT NOT NULL DEFAULT '', source_section TEXT NOT NULL DEFAULT '', salience REAL NOT NULL DEFAULT 0.5, processed INTEGER NOT NULL DEFAULT 0, prediction_error REAL, created_at TEXT NOT NULL, updated_at TEXT NOT NULL, - embedding TEXT, keywords TEXT NOT NULL DEFAULT '[]', + embedding BLOB, keywords TEXT NOT NULL DEFAULT '[]', content_type TEXT DEFAULT 'declarative', prov_model_id TEXT, prov_model_family TEXT, prov_client TEXT, prov_agent TEXT )`, @@ -329,6 +340,29 @@ export class SqliteCortexStore implements CortexStore { this.addColumn(this.t('memories'), 'last_retrieval_score REAL'); this.addColumn(this.t('memories'), 'last_hop_count INTEGER'); this.addColumn(this.t('memories'), 'memory_origin TEXT'); + this.migrateEmbeddingsToBlobs(); + } + + /** + * One-time conversion of legacy JSON-text embeddings to Float32Array blobs. + * Idempotent: only rows whose stored value is still text are touched, so + * re-opening an already-converted DB is a no-op scan. Column affinity does + * not matter here — SQLite stores blob values as blobs regardless of the + * declared column type, and typeof() reports the stored type. + */ + private migrateEmbeddingsToBlobs(): void { + for (const table of [this.t('memories'), this.t('observations')]) { + const rows = this.db.prepare( + `SELECT id, embedding FROM ${table} WHERE typeof(embedding) = 'text'` + ).all() as { id: string; embedding: string }[]; + if (rows.length === 0) continue; + const update = this.db.prepare(`UPDATE ${table} SET embedding = ? WHERE id = ?`); + this.db.transaction(() => { + for (const row of rows) { + update.run(encodeEmbedding(parseJSON(row.embedding, [])), row.id); + } + })(); + } } private addColumn(table: string, columnDef: string): void { @@ -414,7 +448,7 @@ export class SqliteCortexStore implements CortexStore { created_at: toISO(memory.created_at), updated_at: toISO(memory.updated_at), last_accessed: toISO(memory.last_accessed), source_files: JSON.stringify(memory.source_files ?? []), - embedding: JSON.stringify(memory.embedding ?? []), + embedding: encodeEmbedding(memory.embedding), tags: JSON.stringify(memory.tags ?? []), fsrs_stability: memory.fsrs.stability, fsrs_difficulty: memory.fsrs.difficulty, fsrs_reps: memory.fsrs.reps, fsrs_lapses: memory.fsrs.lapses, @@ -450,7 +484,7 @@ export class SqliteCortexStore implements CortexStore { if (updates.updated_at !== undefined) { sets.push('updated_at = @ua'); vals.ua = updates.updated_at.toISOString(); } if (updates.last_accessed !== undefined) { sets.push('last_accessed = @la'); vals.la = updates.last_accessed.toISOString(); } if (updates.source_files !== undefined) { sets.push('source_files = @sf'); vals.sf = JSON.stringify(updates.source_files); } - if (updates.embedding !== undefined) { sets.push('embedding = @emb'); vals.emb = JSON.stringify(updates.embedding); } + if (updates.embedding !== undefined) { sets.push('embedding = @emb'); vals.emb = encodeEmbedding(updates.embedding); } if (updates.tags !== undefined) { sets.push('tags = @tags'); vals.tags = JSON.stringify(updates.tags); } if (updates.faded !== undefined) { sets.push('faded = @faded'); vals.faded = updates.faded ? 1 : 0; } if (updates.salience_original !== undefined) { sets.push('salience_original = @so'); vals.so = updates.salience_original; } @@ -564,7 +598,7 @@ export class SqliteCortexStore implements CortexStore { sal: obs.salience, proc: obs.processed ? 1 : 0, pe: obs.prediction_error ?? null, ca: toISO(obs.created_at), ua: toISO(obs.updated_at), - emb: obs.embedding ? JSON.stringify(obs.embedding) : null, + emb: obs.embedding ? encodeEmbedding(obs.embedding) : null, kw: JSON.stringify(obs.keywords ?? []), ct: obs.content_type ?? 'declarative', pmi: obs.provenance?.model_id ?? null, pmf: obs.provenance?.model_family ?? null, @@ -892,7 +926,7 @@ export class SqliteCortexStore implements CortexStore { created_at: toISO(memory.created_at), updated_at: toISO(memory.updated_at), last_accessed: toISO(memory.last_accessed), source_files: JSON.stringify(memory.source_files ?? []), - embedding: JSON.stringify(memory.embedding ?? []), + embedding: encodeEmbedding(memory.embedding), tags: JSON.stringify(memory.tags ?? []), fsrs_stability: memory.fsrs.stability, fsrs_difficulty: memory.fsrs.difficulty, fsrs_reps: memory.fsrs.reps, fsrs_lapses: memory.fsrs.lapses, @@ -922,7 +956,7 @@ export class SqliteCortexStore implements CortexStore { sal: obs.salience, proc: obs.processed ? 1 : 0, pe: obs.prediction_error ?? null, ca: toISO(obs.created_at), ua: toISO(obs.updated_at), - emb: obs.embedding ? JSON.stringify(obs.embedding) : null, + emb: obs.embedding ? encodeEmbedding(obs.embedding) : null, kw: JSON.stringify(obs.keywords ?? []), ct: obs.content_type ?? 'declarative', pmi: obs.provenance?.model_id ?? null, pmf: obs.provenance?.model_family ?? null, From 3d8ac663b29531216168d8d3293c681cf845207f Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 10 Jun 2026 21:16:01 +0000 Subject: [PATCH 5/5] fix(copilot): register context+feedback as core tools, fix flush shutdown, remove dead vars - context and feedback tools were gated behind namespace cognitive_tools config and not in CORE_TOOLS, so they never appeared in ListTools. Added both to CORE_TOOLS so they are always active like query/observe. - SIGTERM/SIGINT consolidator flush handler returned immediately, leaving the flush promise racing against process exit. Handlers now call process.exit(0) in the .finally() callback so the process stays alive until flush completes. beforeExit keeps the existing pattern (flush promise holds the event loop). - Removed two dead `now` variable declarations in context.ts L0/L1 handlers (elapsedDaysSince() computes its own reference time internally). https://claude.ai/code/session_01DAZ3GzRri9hqxkTyqmSpc4 --- src/mcp/server.ts | 18 +++++++++++++----- src/mcp/tools.ts | 2 ++ src/tools/context.ts | 4 ---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 8e97d8d..955dd0d 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -142,11 +142,19 @@ export async function createContext(config: CortexConfig): Promise { consolidator.flush().catch(() => {}); }; - process.once('SIGTERM', consolidatorFlush); - process.once('SIGINT', consolidatorFlush); - process.once('beforeExit', consolidatorFlush); + // 7c. Flush pending observations to memory on shutdown. + // SIGTERM/SIGINT: flush then explicitly exit so the process doesn't + // terminate before the async flush completes (signal handlers return + // immediately; the pending promise alone is not enough to keep the + // process alive once stdio closes). + const consolidatorFlushAndExit = () => { + consolidator.flush().catch(() => {}).finally(() => process.exit(0)); + }; + process.once('SIGTERM', consolidatorFlushAndExit); + process.once('SIGINT', consolidatorFlushAndExit); + // beforeExit fires when the event loop is empty — the flush promise + // keeps it alive until complete, so no explicit exit call is needed. + process.once('beforeExit', () => { consolidator.flush().catch(() => {}); }); // 8. Filter active tools by namespace config + core set const activeToolNames = namespaces.getActiveTools(); diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index 8f19f3e..abafd69 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -312,4 +312,6 @@ export const CORE_TOOLS = [ 'ops_append', 'ops_query', 'ops_update', + 'context', + 'feedback', ] as const; diff --git a/src/tools/context.ts b/src/tools/context.ts index 0630d3a..c1b3025 100644 --- a/src/tools/context.ts +++ b/src/tools/context.ts @@ -66,7 +66,6 @@ export const contextTool: ToolDefinition = { if (tier === 'L0') { const rawEmbedding = await ctx.embed.embed(text); const candidates = await store.findNearest(rawEmbedding, 20); - const now = new Date(); const scored = candidates.map((r) => { const daysSince = r.memory.fsrs.last_review @@ -80,7 +79,6 @@ export const contextTool: ToolDefinition = { .sort((a, b) => b.score - a.score) .slice(0, 3); - void now; return { tier: 'L0', namespace: resolvedNs, @@ -103,7 +101,6 @@ export const contextTool: ToolDefinition = { const nearest = await store.findNearest(embedding, 15); - const now = new Date(); const results = await Promise.all( nearest.map(async (r) => { const daysSince = r.memory.fsrs.last_review @@ -124,7 +121,6 @@ export const contextTool: ToolDefinition = { }), ); - void now; const sorted = results .sort((a, b) => b.compositeScore - a.compositeScore);