adoresever · junyangyan4-svg · Apr 3, 2026
diff --git a/README.md b/README.md
@@ -63,6 +63,14 @@ The embedding module now uses raw `fetch` instead of the `openai` SDK, making it
 - Ollama, llama.cpp, vLLM (local models)
 - Any endpoint that implements `POST /embeddings`
 
+### Operational safeguards
+
+Three small safeguards make graph-memory cheaper and safer to run in busy OpenClaw deployments:
+
+- **Readonly subagent/helper sessions**: subagents and short-lived helper sessions can still inherit recall context, but they no longer write noisy long-term memory into the shared graph. This keeps maintenance focused on human-facing sessions instead of ephemeral worker chatter.
+- **Permanent-error LLM cooldown**: repeated `400/401/403/404/422` failures from `config.llm` now trigger a temporary cooldown instead of hammering the provider every turn. This turns broken credentials or disabled accounts into a contained failure instead of runaway token spend.
+- **Community summary reuse**: community summaries are now keyed by member signatures. If a community has not changed, graph-memory skips regeneration; if the same member set reappears under a different community id, it reuses the cached summary and embedding. This cuts unnecessary summary LLM calls without changing recall quality.
+
 ### Windows one-click installer
 
 v2.0 ships a **Windows installer** (`.exe`). Download from [Releases](https://github.com/adoresever/graph-memory/releases):
@@ -144,6 +152,7 @@ assemble (zero LLM)
 afterTurn (async, non-blocking)
   ├─ LLM extracts triples → gm_nodes + gm_edges
   ├─ Every 7 turns: PageRank + community detection + community summaries
+  │    └─ unchanged communities reuse cached summaries/embeddings
   └─ User sends new message → extract auto-interrupted
 
 session_end
@@ -152,6 +161,7 @@ session_end
 
 Next session → before_prompt_build
   ├─ Dual-path recall (precise + generalized)
+  ├─ Subagent/helper sessions stay recall-only
   └─ Personalized PageRank ranking → inject into context
 ```
 
@@ -323,6 +333,7 @@ sqlite3 ~/.openclaw/graph-memory.db "SELECT id, summary FROM gm_communities;"
 | `recall` works but `gm_messages` is empty | `plugins.slots.contextEngine` not set | Add `"contextEngine": "graph-memory"` to `plugins.slots` |
 | `FTS5 search mode` instead of `vector search ready` | Embedding not configured or API key invalid | Check `config.embedding` credentials |
 | `No LLM available` error | LLM config missing after plugin reinstall | Re-add `config.llm` to `plugins.entries.graph-memory` |
+| Repeated `LLM API 403/404/422` errors | Broken account, credentials, or provider-side permanent failure | Fix the provider config; graph-memory now enters a temporary cooldown instead of retrying every turn |
 | No `extracted` log after `afterTurn` | Gateway restart caused turn_index overlap | Update to v2.0 (fixes msgSeq persistence) |
 | `content.filter is not a function` | OpenClaw expects array content | Update to v2.0 (adds content normalization) |
 | Nodes are empty after many messages | `compactTurnCount` not reached | Default is 7 messages. Keep chatting or set a lower value |

diff --git a/README_CN.md b/README_CN.md
@@ -65,6 +65,14 @@ Embedding 模块改用原生 `fetch` 替代 `openai` SDK，开箱即用兼容**
 - Ollama、llama.cpp、vLLM（本地模型）
 - 任何实现了 `POST /embeddings` 的端点
 
+### 运行期护栏
+
+下面三个护栏让 graph-memory 在复杂 OpenClaw 部署里更省钱，也更不容易失控：
+
+- **subagent / helper session 只读化**：子代理和临时 helper 仍然可以继承 recall 结果，但不会再把噪音写进共享图谱。这样长期记忆更聚焦在面向用户的主会话，而不是短命 worker 的中间过程。
+- **永久性 LLM 错误冷却**：`config.llm` 如果连续返回 `400/401/403/404/422` 这类永久性错误，graph-memory 会进入短暂冷却，而不是每轮继续轰炸 provider。坏处不是“继续烧 token”，而是变成明确、可恢复的故障信号。
+- **社区摘要去重与复用**：社区摘要现在按成员签名缓存。成员没变就跳过重算；同一组成员换了社区 id 也可以复用已有摘要和 embedding。在不影响召回质量的前提下，大幅减少重复摘要调用。
+
 ### Windows 一键安装包
 
 v2.0 提供 **Windows 安装包**（`.exe`）。从 [Releases](https://github.com/adoresever/graph-memory/releases) 页面下载：
@@ -146,6 +154,7 @@ assemble（零 LLM）
 afterTurn（后台异步，不阻塞用户对话）
   ├─ LLM 提取三元组 → gm_nodes + gm_edges
   ├─ 每 7 轮：PageRank + 社区检测 + 社区摘要生成
+  │    └─ 未变化的社区复用已有摘要/embedding
   └─ 用户发新消息时自动中断提取
 
 session_end
@@ -154,6 +163,7 @@ session_end
 
 下次新对话 → before_prompt_build
   ├─ 双路径召回（精确 + 泛化）
+  ├─ subagent / helper session 只读消费 recall
   └─ 个性化 PageRank 排序 → 注入上下文
 ```
 
@@ -325,6 +335,7 @@ sqlite3 ~/.openclaw/graph-memory.db "SELECT id, summary FROM gm_communities;"
 | `recall` 正常但 `gm_messages` 为空 | 没设置 `plugins.slots.contextEngine` | 在 `plugins.slots` 中添加 `"contextEngine": "graph-memory"` |
 | 显示 `FTS5 search mode` | Embedding 未配置或 API Key 无效 | 检查 `config.embedding` 的密钥和地址 |
 | `No LLM available` 错误 | 重装插件后 LLM 配置丢失 | 重新添加 `config.llm` 到 `plugins.entries.graph-memory` |
+| 持续出现 `LLM API 403/404/422` 错误 | 账号、凭证或 provider 侧出现永久性故障 | 修复 provider 配置；graph-memory 现在会进入临时冷却，而不是每轮继续重试 |
 | `afterTurn` 后没有 `extracted` 日志 | 重启导致 turn_index 重叠 | 升级到 v2.0（修复了 msgSeq 持久化） |
 | `content.filter is not a function` | OpenClaw 要求 content 为数组 | 升级到 v2.0（添加了 content 规范化） |
 | 对话很多轮但节点为空 | 消息数未达到提取阈值 | 默认需要积累消息。继续对话或调低 `compactTurnCount` |

diff --git a/index.ts b/index.ts
@@ -28,6 +28,7 @@ import { sanitizeToolUseResultPairing } from "./src/format/transcript-repair.ts"
 import { runMaintenance } from "./src/graph/maintenance.ts";
 import { invalidateGraphCache, computeGlobalPageRank } from "./src/graph/pagerank.ts";
 import { detectCommunities } from "./src/graph/community.ts";
+import { ReadonlySessionRegistry } from "./src/session-policy.ts";
 import { DEFAULT_CONFIG, type GmConfig } from "./src/types.ts";
 
 // ─── 从 OpenClaw config 读 provider/model ────────────────────
@@ -158,10 +159,24 @@ const graphMemoryPlugin = {
     const msgSeq = new Map<string, number>();
     const recalled = new Map<string, { nodes: any[]; edges: any[] }>();
     const turnCounter = new Map<string, number>(); // 社区维护计数器
+    const readonlySessions = new ReadonlySessionRegistry();
 
     // ── 提取串行化（同 session Promise chain，不同 session 并行）────
     const extractChain = new Map<string, Promise<void>>();
 
+    function isReadonlySession(sessionKey?: string): boolean {
+      return readonlySessions.has(sessionKey);
+    }
+
+    function cleanupSessionState(sessionKey: string | undefined, forgetReadonly = false): void {
+      if (!sessionKey) return;
+      extractChain.delete(sessionKey);
+      msgSeq.delete(sessionKey);
+      recalled.delete(sessionKey);
+      turnCounter.delete(sessionKey);
+      if (forgetReadonly) readonlySessions.clear(sessionKey);
+    }
+
     /** 存一条消息到 gm_messages（同步，零 LLM） */
     function ingestMessage(sessionId: string, message: any): void {
       let seq = msgSeq.get(sessionId);
@@ -245,6 +260,7 @@ const graphMemoryPlugin = {
         if (prompt.includes("/new or /reset") || prompt.includes("new session was started")) return;
 
         const sid = ctx?.sessionId ?? ctx?.sessionKey;
+        if (isReadonlySession(sid)) return;
 
         api.logger.info(`[graph-memory] recall query: "${prompt.slice(0, 80)}"`);
 
@@ -286,6 +302,7 @@ const graphMemoryPlugin = {
         isHeartbeat?: boolean;
       }) {
         if (isHeartbeat) return { ingested: false };
+        if (isReadonlySession(sessionId)) return { ingested: false };
         ingestMessage(sessionId, message);
         return { ingested: true };
       },
@@ -301,7 +318,7 @@ const graphMemoryPlugin = {
         tokenBudget?: number;
         prompt?: string;  // Added in OpenClaw 2026.03.28: prompt-aware retrieval
       }) {
-        const activeNodes = getBySession(db, sessionId);
+        const activeNodes = isReadonlySession(sessionId) ? [] : getBySession(db, sessionId);
         const activeEdges = activeNodes.flatMap((n) => [
           ...edgesFrom(db, n.id),
           ...edgesTo(db, n.id),
@@ -378,6 +395,10 @@ const graphMemoryPlugin = {
         force?: boolean;
         currentTokenCount?: number;
       }) {
+        if (isReadonlySession(sessionId)) {
+          return { ok: true, compacted: false, reason: "readonly session" };
+        }
+
         // compact 仍然保留作为兜底，但主要提取在 afterTurn 完成
         const msgs = getUnextracted(db, sessionId, 50);
 
@@ -444,6 +465,7 @@ const graphMemoryPlugin = {
         tokenBudget?: number;
       }) {
         if (isHeartbeat) return;
+        if (isReadonlySession(sessionId)) return;
 
         // Messages are already persisted by ingest() — only slice to
         // determine the new-message count for extraction triggering.
@@ -503,20 +525,26 @@ const graphMemoryPlugin = {
         parentSessionKey: string;
         childSessionKey: string;
       }) {
+        readonlySessions.markReadonly(childSessionKey);
         const rec = recalled.get(parentSessionKey);
         if (rec) recalled.set(childSessionKey, rec);
-        return { rollback: () => { recalled.delete(childSessionKey); } };
+        return {
+          rollback: () => {
+            cleanupSessionState(childSessionKey, true);
+          },
+        };
       },
 
       async onSubagentEnded({ childSessionKey }: { childSessionKey: string }) {
-        recalled.delete(childSessionKey);
-        msgSeq.delete(childSessionKey);
+        cleanupSessionState(childSessionKey, true);
       },
 
       async dispose() {
         extractChain.clear();
         msgSeq.clear();
         recalled.clear();
+        turnCounter.clear();
+        readonlySessions.clearAll();
       },
     };
 
@@ -533,6 +561,8 @@ const graphMemoryPlugin = {
       if (!sid) return;
 
       try {
+        if (isReadonlySession(sid)) return;
+
         const nodes = getBySession(db, sid);
         if (nodes.length) {
           const summary = (
@@ -581,10 +611,7 @@ const graphMemoryPlugin = {
       } catch (err) {
         api.logger.error(`[graph-memory] session_end error: ${err}`);
       } finally {
-        extractChain.delete(sid);
-        msgSeq.delete(sid);
-        recalled.delete(sid);
-        turnCounter.delete(sid);
+        cleanupSessionState(sid, true);
       }
     });
 
@@ -651,6 +678,12 @@ const graphMemoryPlugin = {
           p: { name: string; type: string; description: string; content: string; relatedSkill?: string },
         ) {
           const sid = ctx?.sessionKey ?? ctx?.sessionId ?? "manual";
+          if (isReadonlySession(sid)) {
+            return {
+              content: [{ type: "text", text: "subagent session is running in read-only graph-memory mode." }],
+              details: { readonly: true, sessionKey: sid },
+            };
+          }
           const { node } = upsertNode(db, {
             type: p.type as any, name: p.name,
             description: p.description, content: p.content,
@@ -704,12 +737,19 @@ const graphMemoryPlugin = {
     );
 
     api.registerTool(
-      (_ctx: any) => ({
+      (ctx: any) => ({
         name: "gm_maintain",
         label: "Graph Memory Maintenance",
         description: "手动触发图维护：运行去重、PageRank 重算、社区检测。通常 session_end 时自动运行，这个工具用于手动触发。",
         parameters: Type.Object({}),
         async execute(_toolCallId: string, _params: any) {
+          const sid = ctx?.sessionKey ?? ctx?.sessionId;
+          if (isReadonlySession(sid)) {
+            return {
+              content: [{ type: "text", text: "subagent session is running in read-only graph-memory mode." }],
+              details: { readonly: true, sessionKey: sid },
+            };
+          }
           const embedFn = (recaller as any).embed ?? undefined;
           const result = await runMaintenance(db, cfg, llm, embedFn);
           const text = [

diff --git a/src/engine/llm-guard.ts b/src/engine/llm-guard.ts
@@ -0,0 +1,39 @@
+const RETRYABLE_STATUSES = new Set([429, 500, 502, 503, 529]);
+const PAUSING_STATUSES = new Set([400, 401, 403, 404, 422]);
+
+export function extractLlmStatus(error: unknown): number | null {
+  const text = String(error ?? "");
+  const match = text.match(/\bLLM API (\d{3})\b/);
+  if (!match) return null;
+  return Number(match[1]);
+}
+
+export class LlmFailureGuard {
+  private pausedUntil = 0;
+
+  constructor(
+    private readonly cooldownMs = 10 * 60_000,
+    private readonly now = () => Date.now(),
+  ) {}
+
+  canRun(): boolean {
+    return this.now() >= this.pausedUntil;
+  }
+
+  remainingMs(): number {
+    return Math.max(0, this.pausedUntil - this.now());
+  }
+
+  reset(): void {
+    this.pausedUntil = 0;
+  }
+
+  tripIfNeeded(error: unknown): boolean {
+    const status = extractLlmStatus(error);
+    if (status == null || RETRYABLE_STATUSES.has(status) || !PAUSING_STATUSES.has(status)) {
+      return false;
+    }
+    this.pausedUntil = Math.max(this.pausedUntil, this.now() + this.cooldownMs);
+    return true;
+  }
+}