MemTensor · Memtensor-AI · Jun 15, 2026
diff --git a/apps/memos-local-plugin/core/retrieval/llm-filter.ts b/apps/memos-local-plugin/core/retrieval/llm-filter.ts
@@ -24,12 +24,32 @@ import type { LlmClient } from "../llm/index.js";
 import type { Logger } from "../logger/types.js";
 import { RETRIEVAL_FILTER_PROMPT } from "../llm/prompts/index.js";
 import type { RankedCandidate } from "./ranker.js";
-import type { RetrievalConfig } from "./types.js";
+import type { RetrievalConfig, TraceCandidate } from "./types.js";
 
 const DEFAULT_CANDIDATE_BODY_CHARS = 500;
 const MIN_FILTER_OUTPUT_TOKENS = 160;
 const MAX_FILTER_OUTPUT_TOKENS = 2048;
 
+/**
+ * A trace whose `agentText` falls under this length, with no LLM summary
+ * or reflection to back it up, is treated as a near-duplicate question
+ * trace (issue #1913). The rescue path keeps these *behind* informative
+ * candidates so the answer-bearing trace surfaces first.
+ */
+const INFORMATIVE_AGENT_TEXT_MIN_CHARS = 20;
+
+/**
+ * Short acknowledgement / scaffold replies that the filter prompt
+ * rightly classes as "scaffolding chatter". When the LLM filter empties
+ * the kept set we still need to make a rescue call — these strings let
+ * us prefer informative replies over plain acks. Bounded list, exact
+ * matches only after trimming surrounding punctuation / whitespace.
+ */
+const SHORT_ACK_PATTERNS: readonly RegExp[] = [
+  /^(ok|okay|sure|got it|noted|understood|alright|will do|copy|copy that|thanks|thank you|✓|✅|👍)[\s.!]*$/i,
+  /^(记住了|已记住|已经记住|好的|明白|收到|了解|谢谢)[\s。!]*$/,
+];
+
 export interface FilterInput {
   query: string;
   ranked: readonly RankedCandidate[];
@@ -70,6 +90,12 @@ export interface FilterResult {
     | "deferred_to_final"
     | "llm_kept_all"
     | "llm_filtered"
+    // The LLM returned an empty selection over a non-empty ranked list
+    // (issue #1913 — repeated question traces crowding the hit set).
+    // We rescued the top-K best-scoring candidates so the agent always
+    // sees a packet when retrieval succeeded; `sufficient` is forced
+    // to `false` so downstream callers know the injection is weak.
+    | "llm_filtered_refilled"
     // The LLM was supposed to run but the call failed / parsed badly.
     // We applied a mechanical relevance cutoff (top-K above
     // `relativeThresholdFloor · topRelevance`) instead of dumping the
@@ -169,15 +195,18 @@ ${list}`,
     );
     const keepIndices = new Set(cappedIndices);
     if (keepIndices.size === 0) {
-      // Model asked us to drop everything — honoured. Surface this
-      // explicitly so the Logs page can show "LLM found nothing
-      // relevant" instead of silently injecting a partial packet.
-      return {
-        kept: [],
-        dropped: [...ranked],
-        outcome: "llm_filtered",
-        sufficient: sufficient ?? false,
-      };
+      // Issue #1913: the model asked us to drop everything. Honouring
+      // that verbatim used to collapse `turn.start` injection to "" even
+      // when retrieval was healthy — the failure mode is a hit set
+      // dominated by near-duplicate question traces from prior
+      // sessions, where each candidate individually looks like
+      // "surface-similar wrong sub-problem" to the filter prompt.
+      // Instead, rescue the top-K best-scoring candidates (preferring
+      // informative traces over pure-question / ack-only chatter) so
+      // the agent always sees a packet when retrieval succeeded.
+      // `safeCutoff`'s sibling escape hatch (`llmFilterMaxKeep === 0`)
+      // is honoured so operators can still ask for hard drop.
+      return rescueFromEmptySelection(ranked, deps, sufficient);
     }
     const kept = cappedIndices.map((i) => ranked[i]!);
     const dropped: RankedCandidate[] = [];
@@ -214,6 +243,95 @@ function passthrough(
   return { kept: [...ranked], dropped: [], outcome, sufficient: null };
 }
 
+/**
+ * Issue #1913 rescue path. Invoked when the LLM relevance filter
+ * returned `selected: []` for a *non-empty* ranked candidate list — the
+ * most common cause is a hit set dominated by near-duplicate question
+ * traces from previous sessions, where the filter prompt's "drop
+ * scaffolding chatter" / "drop surface-similar wrong sub-problem"
+ * rubric is applied to every candidate.
+ *
+ * Strategy: keep the top-K best-scoring candidates, preferring
+ * informative traces (skill / episode / experience / world-model, or a
+ * trace whose `agentText`/`summary`/`reflection` carries real content)
+ * over pure-question chatter. We do NOT re-query the LLM — the rescue
+ * is a single O(n) partition + slice. Outcome label is
+ * `"llm_filtered_refilled"` so the Logs viewer can show "LLM collapsed,
+ * safety net fired" distinct from a normal `"llm_filtered"`.
+ *
+ * Escape hatch: `llmFilterMaxKeep === 0` skips the rescue entirely and
+ * honours the "drop everything" request (matches existing `safeCutoff`
+ * semantics for the same config value).
+ */
+function rescueFromEmptySelection(
+  ranked: readonly RankedCandidate[],
+  deps: FilterDeps,
+  sufficient: boolean | null,
+): FilterResult {
+  const keepCap = Math.max(0, deps.config.llmFilterMaxKeep);
+  if (keepCap === 0 || ranked.length === 0) {
+    return {
+      kept: [],
+      dropped: [...ranked],
+      outcome: "llm_filtered",
+      sufficient: sufficient ?? false,
+    };
+  }
+  const informative: RankedCandidate[] = [];
+  const chatter: RankedCandidate[] = [];
+  for (const r of ranked) {
+    if (isInformativeCandidate(r)) informative.push(r);
+    else chatter.push(r);
+  }
+  // Preserve ranker order within each bucket; informative first so the
+  // answer-bearing trace surfaces even when the ranker placed it below
+  // surface-similar question traces.
+  const ordered = [...informative, ...chatter];
+  const kept = ordered.slice(0, Math.min(keepCap, ordered.length));
+  const keptSet = new Set(kept);
+  const dropped = ranked.filter((r) => !keptSet.has(r));
+  deps.log.debug("llm_filter.collapsed_refill", {
+    ranked: ranked.length,
+    rescued: kept.length,
+    informative: informative.length,
+    chatter: chatter.length,
+    filteredAll: true,
+  });
+  return {
+    kept,
+    dropped,
+    outcome: "llm_filtered_refilled",
+    sufficient: sufficient ?? false,
+  };
+}
+
+/**
+ * Returns true when a ranked candidate carries content the agent can
+ * actually use. Skills, episodes, experiences, and world-models always
+ * count. Traces count when their `summary` or `reflection` is non-empty
+ * or their `agentText` is longer than a short acknowledgement.
+ *
+ * Used by the rescue path (and intentionally only there) to bias the
+ * rescued set toward traces with informative assistant text. False
+ * negatives (an informative trace mistakenly labelled chatter) still
+ * get rescued because they sit in the second half of the ordered list.
+ */
+function isInformativeCandidate(r: RankedCandidate): boolean {
+  const c = r.candidate;
+  if (c.refKind !== "trace") return true;
+  const t = c as TraceCandidate;
+  if ((t.summary?.trim().length ?? 0) > 0) return true;
+  if ((t.reflection?.trim().length ?? 0) > 0) return true;
+  const agent = t.agentText?.trim() ?? "";
+  if (agent.length === 0) return false;
+  if (isShortAck(agent)) return false;
+  return agent.length >= INFORMATIVE_AGENT_TEXT_MIN_CHARS;
+}
+
+function isShortAck(text: string): boolean {
+  return SHORT_ACK_PATTERNS.some((re) => re.test(text));
+}
+
 /**
  * Mechanical fail-closed: when the LLM is unavailable / errored,
  * apply a relative-relevance cutoff so we don't dump the entire ranked

diff --git a/apps/memos-local-plugin/core/retrieval/types.ts b/apps/memos-local-plugin/core/retrieval/types.ts
@@ -737,6 +737,7 @@ export interface RetrievalStats {
     | "deferred_to_final"
     | "llm_kept_all"
     | "llm_filtered"
+    | "llm_filtered_refilled"
     | "llm_failed_safe_cutoff";
   llmFilterSufficient?: boolean;
   llmFilterKept?: number;

diff --git a/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/integration.test.ts
@@ -402,6 +402,45 @@ describe("retrieval/integration", () => {
     expect(res.stats.emptyPacket).toBe(false);
   });
 
+  it("turn_start rescues injection when LLM filter empties the kept set (#1913)", async () => {
+    // Repro for issue #1913: when the LLM relevance filter returns
+    // `selected: []` for a non-empty ranked list (the case where the
+    // top hits are all near-duplicate question traces), the packet
+    // used to collapse to an empty injection. The rescue path keeps
+    // the top-K best-scoring candidates so the agent still gets a
+    // packet, and surfaces `llm_filtered_refilled` so the Logs viewer
+    // can show the safety net fired.
+    const llm: any = {
+      completeJson: async () => ({
+        value: { selected: [], sufficient: false },
+        servedBy: "fake",
+      }),
+    };
+    const res = await turnStartRetrieve(
+      {
+        ...makeDeps(handle),
+        llm,
+        config: {
+          ...makeDeps(handle).config,
+          llmFilterEnabled: true,
+          llmFilterMinCandidates: 1,
+        },
+      },
+      {
+        reason: "turn_start",
+        agent: "openclaw",
+        sessionId: "s_current" as SessionId,
+        userText: "run docker compose",
+        ts: NOW as never,
+      },
+    );
+
+    expect(res.packet.snippets.length).toBeGreaterThan(0);
+    expect(res.packet.rendered.length).toBeGreaterThan(0);
+    expect(res.stats.llmFilterOutcome).toBe("llm_filtered_refilled");
+    expect(res.stats.emptyPacket).toBe(false);
+  });
+
   it("skill_invoke is tier1-heavy", async () => {
     const res = await skillInvokeRetrieve(makeDeps(handle), {
       reason: "skill_invoke",

diff --git a/apps/memos-local-plugin/tests/unit/retrieval/llm-filter.test.ts b/apps/memos-local-plugin/tests/unit/retrieval/llm-filter.test.ts
@@ -57,6 +57,33 @@ function trace(id: string, score: number): RankedCandidate {
   };
 }
 
+/**
+ * Build a trace candidate with no LLM-generated summary / reflection
+ * and only a short acknowledgement-style `agentText` — i.e. exactly the
+ * "near-duplicate question trace" shape from issue #1913 where the
+ * user re-asked a stored fact across multiple sessions and the
+ * assistant only acked it. The current LLM filter prompt classes these
+ * as "scaffolding chatter" and is allowed to drop them all.
+ */
+function chatterTrace(
+  id: string,
+  score: number,
+  overrides: { agentText?: string; userText?: string } = {},
+): RankedCandidate {
+  const r = trace(id, score);
+  const cand = r.candidate as TraceCandidate;
+  return {
+    ...r,
+    candidate: {
+      ...cand,
+      userText: overrides.userText ?? `What does HERMES_REAL_E2E_1910 mean? (${id})`,
+      agentText: overrides.agentText ?? "OK",
+      summary: null,
+      reflection: null,
+    },
+  };
+}
+
 describe("retrieval/llm-filter", () => {
   it("disabled → passthrough with null sufficient", async () => {
     const result = await llmFilterCandidates(
@@ -135,7 +162,100 @@ describe("retrieval/llm-filter", () => {
     expect(result.sufficient).toBe(true);
   });
 
-  it("LLM returns empty selection → drops everything and marks insufficient", async () => {
+  it("LLM returns empty selection over non-empty ranked → rescues top-K informative candidates (#1913)", async () => {
+    // Issue #1913 repro shape: 3 near-duplicate question traces from
+    // previous sessions plus 1 answer-bearing trace. Previous filter
+    // honoured `selected: []` and collapsed to empty injection.
+    const llm: any = {
+      completeJson: vi.fn().mockResolvedValue({
+        value: { selected: [], sufficient: false },
+        servedBy: "fake",
+      }),
+    };
+    const ranked = [
+      chatterTrace("q1", 0.95),
+      chatterTrace("q2", 0.94),
+      // Answer-bearing trace: long informative agentText, even though
+      // the ranker placed it below the question duplicates.
+      (() => {
+        const r = trace("answer", 0.85);
+        const c = r.candidate as TraceCandidate;
+        return {
+          ...r,
+          candidate: {
+            ...c,
+            userText:
+              "Remember this fact: HERMES_REAL_E2E_1910 means the bridge leak fix verification.",
+            agentText: "Noted. HERMES_REAL_E2E_1910 → bridge leak fix verification.",
+            summary:
+              "HERMES_REAL_E2E_1910 marks the bridge-leak fix verification.",
+            reflection: null,
+          },
+        };
+      })(),
+      chatterTrace("q3", 0.8),
+    ];
+    const result = await llmFilterCandidates(
+      { query: "What does HERMES_REAL_E2E_1910 mean?", ranked },
+      { llm, log, config: cfg },
+    );
+    expect(result.outcome).toBe("llm_filtered_refilled");
+    expect(result.kept.length).toBeGreaterThanOrEqual(1);
+    expect(result.kept[0]!.candidate.refId).toBe("answer");
+    expect(result.sufficient).toBe(false);
+    // Strong negative assertion: the bug returned kept=[], dropped=ranked.
+    expect(result.dropped.length).toBeLessThan(ranked.length);
+  });
+
+  it("rescue fires even when every ranked candidate is short-ack chatter", async () => {
+    const llm: any = {
+      completeJson: vi.fn().mockResolvedValue({
+        value: { selected: [], sufficient: false },
+        servedBy: "fake",
+      }),
+    };
+    const ranked = [
+      chatterTrace("q1", 0.9, { agentText: "OK" }),
+      chatterTrace("q2", 0.85, { agentText: "记住了" }),
+      chatterTrace("q3", 0.8, { agentText: "👍" }),
+    ];
+    const result = await llmFilterCandidates(
+      { query: "What does HERMES_REAL_E2E_1910 mean?", ranked },
+      { llm, log, config: cfg },
+    );
+    expect(result.outcome).toBe("llm_filtered_refilled");
+    // No informative candidate exists — rescue still keeps top-K by
+    // ranker score so the agent at least sees one memory.
+    expect(result.kept.length).toBeGreaterThanOrEqual(1);
+    expect(result.kept[0]!.candidate.refId).toBe("q1"); // highest score
+    expect(result.sufficient).toBe(false);
+  });
+
+  it("rescue respects llmFilterMaxKeep cap", async () => {
+    const llm: any = {
+      completeJson: vi.fn().mockResolvedValue({
+        value: { selected: [], sufficient: false },
+        servedBy: "fake",
+      }),
+    };
+    const ranked = [
+      trace("a", 0.95),
+      trace("b", 0.9),
+      trace("c", 0.85),
+      trace("d", 0.8),
+    ];
+    const result = await llmFilterCandidates(
+      { query: "q", ranked },
+      { llm, log, config: { ...cfg, llmFilterMaxKeep: 2 } },
+    );
+    expect(result.outcome).toBe("llm_filtered_refilled");
+    expect(result.kept.length).toBe(2);
+    expect(result.dropped.length).toBe(2);
+  });
+
+  it("llmFilterMaxKeep=0 disables rescue: honours empty selection (drop-everything override)", async () => {
+    // This is the only configured way to ask the filter to truly drop
+    // everything; keep it explicit so operators have an escape hatch.
     const llm: any = {
       completeJson: vi.fn().mockResolvedValue({
         value: { selected: [], sufficient: false },
@@ -145,14 +265,30 @@ describe("retrieval/llm-filter", () => {
     const ranked = [trace("a", 0.9), trace("b", 0.8)];
     const result = await llmFilterCandidates(
       { query: "q", ranked },
-      { llm, log, config: cfg },
+      { llm, log, config: { ...cfg, llmFilterMaxKeep: 0 } },
     );
     expect(result.outcome).toBe("llm_filtered");
     expect(result.kept.length).toBe(0);
     expect(result.dropped.length).toBe(2);
     expect(result.sufficient).toBe(false);
   });
 
+  it("LLM returns empty selection with empty ranked list → unchanged (still llm_filtered, kept=[])", async () => {
+    const llm: any = {
+      completeJson: vi.fn().mockResolvedValue({
+        value: { selected: [], sufficient: false },
+        servedBy: "fake",
+      }),
+    };
+    // ranked empty but minCandidates=0 so the filter still runs
+    const result = await llmFilterCandidates(
+      { query: "q", ranked: [] },
+      { llm, log, config: { ...cfg, llmFilterMinCandidates: 0 } },
+    );
+    expect(result.outcome).toBe("below_threshold");
+    expect(result.kept.length).toBe(0);
+  });
+
   it("coerces string / number `sufficient` fields sent by lax models", async () => {
     const llm: any = {
       completeJson: vi.fn().mockResolvedValue({