From 9b33766478dd05605904bfaae8b4c605e9113c00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C4=93sa=20AI?= Date: Fri, 24 Apr 2026 13:01:40 -0700 Subject: [PATCH] fix(memory-core): stream seedEmbeddingCache via iterate to prevent V8 heap OOM The embedding_cache table sync in MemoryManager.seedEmbeddingCache called .all() on SELECT * FROM embedding_cache, materializing the full result set into a JS array. embedding_cache rows contain serialized embedding text (~20 KB each on text-embedding-3-small) and can grow into hundreds of thousands of rows on long-running deployed databases. On a local 16 GB main.sqlite (435,136 rows, 8.68 GB of embedding text), the .all() call exceeds V8's ~4 GB default heap limit and aborts the gateway with: FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap out of memory ... node::sqlite::StatementSync::All ... Switching .all() -> .iterate() streams rows one at a time through the same BEGIN/COMMIT upsert transaction. Peak V8 heap stays bounded by a single row (~20 KB) plus the prepared statement, not the whole table. Also drops the empty-check on the materialized array's .length; an empty iterator commits a no-op transaction, which is cheap and preserves the observable behavior for empty caches. Scope note: this is the primary R2.A target (seedEmbeddingCache); a follow-up patch will address the secondary listChunks / keyword fallback .all() path in manager-search.ts. Validation: - pnpm tsgo:prod: green (core + extensions graphs) - pnpm test extensions/memory-core: 512 passed, 3 skipped, 0 failed --- extensions/memory-core/src/memory/manager-sync-ops.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/extensions/memory-core/src/memory/manager-sync-ops.ts b/extensions/memory-core/src/memory/manager-sync-ops.ts index 5985fa3ef013..1b82f0c8f770 100644 --- a/extensions/memory-core/src/memory/manager-sync-ops.ts +++ b/extensions/memory-core/src/memory/manager-sync-ops.ts @@ -304,7 +304,7 @@ export abstract class MemoryManagerSyncOps { .prepare( `SELECT provider, model, provider_key, hash, embedding, dims, updated_at FROM ${EMBEDDING_CACHE_TABLE}`, ) - .all() as Array<{ + .iterate() as IterableIterator<{ provider: string; model: string; provider_key: string; @@ -313,9 +313,7 @@ export abstract class MemoryManagerSyncOps { dims: number | null; updated_at: number; }>; - if (!rows.length) { - return; - } + // Note: no early-return on empty iterator; BEGIN/COMMIT over an empty tx is cheap. const insert = this.db.prepare( `INSERT INTO ${EMBEDDING_CACHE_TABLE} (provider, model, provider_key, hash, embedding, dims, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)