From 1f141ff77ed099ca4943b3317bca837e6fe9cf31 Mon Sep 17 00:00:00 2001 From: AutoDev Agent Date: Wed, 3 Jun 2026 19:20:15 +0800 Subject: [PATCH] fix: exclude short traces from embedding maintenance stats - Add shouldTraceHaveEmbeddings() helper to filter traces with insufficient content - Skip traces where both user_text and agent_text are under 10 chars - Skip traces where total combined length is under 20 chars - Fixes misleading 'missing' count after bulk import (issue #1746) - Applies filter consistently to stats computation and repair operations --- .../core/pipeline/memory-core.ts | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/apps/memos-local-plugin/core/pipeline/memory-core.ts b/apps/memos-local-plugin/core/pipeline/memory-core.ts index 4622a35c3..cfb37f64e 100644 --- a/apps/memos-local-plugin/core/pipeline/memory-core.ts +++ b/apps/memos-local-plugin/core/pipeline/memory-core.ts @@ -4076,12 +4076,36 @@ export function createMemoryCore( return bestDim; } + function shouldTraceHaveEmbeddings(row: TraceRow): boolean { + // Skip traces where both user and agent text are very short + const userLen = row.userText.trim().length; + const agentLen = row.agentText.trim().length; + + // If both are under 10 chars, definitely skip + if (userLen < 10 && agentLen < 10) { + return false; + } + + // If total combined length is under 20 chars, skip + // (covers cases like "ok" / "Got it, processing..." which aren't meaningful memories) + if (userLen + agentLen < 20) { + return false; + } + + return true; + } + function collectEmbeddingSlots(): EmbeddingSlot[] { const slots: EmbeddingSlot[] = []; const pageSize = 500; for (let offset = 0;; offset += pageSize) { const rows = handle.repos.traces.list({ limit: pageSize, offset, newestFirst: false }); for (const row of rows) { + // Skip traces that shouldn't have embeddings + if (!shouldTraceHaveEmbeddings(row)) { + continue; + } + slots.push({ kind: "trace", id: row.id,