From 655fbe002a4acb44fa9b51205660c57c3758c02e Mon Sep 17 00:00:00 2001
From: Alan Pena <alanpena91@gmail.com>
Date: Wed, 25 Mar 2026 00:17:46 -0400
Subject: [PATCH 1/3] fix: auto-reset telegram bridge session after lock
 failure

---
 scripts/telegram-bridge.js                 |  44 +++++--
 test/telegram-bridge-session-reset.test.js | 126 +++++++++++++++++++++
 2 files changed, 162 insertions(+), 8 deletions(-)
 create mode 100644 test/telegram-bridge-session-reset.test.js

diff --git a/scripts/telegram-bridge.js b/scripts/telegram-bridge.js
index c51a5529a..c023c2d6b 100755
--- a/scripts/telegram-bridge.js
+++ b/scripts/telegram-bridge.js
@@ -140,20 +140,36 @@ function runAgentInSandbox(message, sessionId) {
       const response = responseLines.join("\n").trim();
 
       if (response) {
-        resolve(response);
+        resolve({ response, exitCode: code, stderr });
       } else if (code !== 0) {
-        resolve(`Agent exited with code ${code}. ${stderr.trim().slice(0, 500)}`);
+        resolve({ response: `Agent exited with code ${code}. ${stderr.trim().slice(0, 500)}`, exitCode: code, stderr });
       } else {
-        resolve("(no response)");
+        resolve({ response: "(no response)", exitCode: code, stderr });
       }
     });
 
     proc.on("error", (err) => {
-      resolve(`Error: ${err.message}`);
+      resolve({ response: `Error: ${err.message}`, exitCode: 1, stderr: err.message });
     });
   });
 }
 
+// ── Session lock detection ────────────────────────────────────────
+
+/**
+ * Returns true when an agent result indicates a session-file lock collision.
+ * Only matches the specific lock/corruption class of errors — not general failures.
+ * Exit code 255 alone is not sufficient (SSH uses it for connection errors too),
+ * so we require either an explicit lock message in stderr/response, or code 255
+ * combined with lock-related output.
+ */
+function isSessionLockFailure(result) {
+  const output = `${result.stderr || ""} ${result.response || ""}`;
+  if (output.includes("session file locked")) return true;
+  if (result.exitCode === 255 && /lock|session.*corrupt/i.test(output)) return true;
+  return false;
+}
+
 // ── Poll loop ─────────────────────────────────────────────────────
 
 async function poll() {
@@ -205,10 +221,20 @@ async function poll() {
         const typingInterval = setInterval(() => sendTyping(chatId), 4000);
 
         try {
-          const response = await runAgentInSandbox(msg.text, chatId);
+          const result = await runAgentInSandbox(msg.text, chatId);
           clearInterval(typingInterval);
-          console.log(`[${chatId}] agent: ${response.slice(0, 100)}...`);
-          await sendMessage(chatId, response, msg.message_id);
+
+          // Detect session lock failures and auto-reset to prevent corrupted
+          // context from persisting into subsequent messages (#833).
+          if (isSessionLockFailure(result)) {
+            activeSessions.delete(chatId);
+            console.error(`[${chatId}] session lock failure — session reset`);
+            await sendMessage(chatId, "⚠️ Session error detected — your session has been automatically reset. Please resend your message.", msg.message_id);
+            continue;
+          }
+
+          console.log(`[${chatId}] agent: ${result.response.slice(0, 100)}...`);
+          await sendMessage(chatId, result.response, msg.message_id);
         } catch (err) {
           clearInterval(typingInterval);
           await sendMessage(chatId, `Error: ${err.message}`, msg.message_id);
@@ -249,4 +275,6 @@ async function main() {
   poll();
 }
 
-main();
+if (require.main === module) {
+  main();
+}
diff --git a/test/telegram-bridge-session-reset.test.js b/test/telegram-bridge-session-reset.test.js
new file mode 100644
index 000000000..5eadedb24
--- /dev/null
+++ b/test/telegram-bridge-session-reset.test.js
@@ -0,0 +1,126 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Regression tests for #833: session lock failures must auto-reset the session
+// so the next message starts with clean context instead of resuming corrupted
+// conversation history.
+
+import fs from "node:fs";
+import path from "node:path";
+import { describe, it, expect } from "vitest";
+
+const BRIDGE_SRC = fs.readFileSync(
+  path.join(import.meta.dirname, "..", "scripts", "telegram-bridge.js"),
+  "utf-8",
+);
+
+// Extract and evaluate the isSessionLockFailure function from the source
+// so we test the real implementation without triggering the script's
+// top-level side effects (process.exit, openshell resolution, etc.).
+
+const fnMatch = BRIDGE_SRC.match(
+  /function isSessionLockFailure\(result\)\s*\{[\s\S]*?^\}/m,
+);
+if (!fnMatch) throw new Error("Could not extract isSessionLockFailure from telegram-bridge.js");
+const isSessionLockFailure = new Function(
+  `${fnMatch[0]}; return isSessionLockFailure;`,
+)();
+
+describe("isSessionLockFailure", () => {
+  it("detects exit code 255 as a lock failure when stderr mentions lock", () => {
+    const result = { response: "some output", exitCode: 255, stderr: "lock timeout" };
+    expect(isSessionLockFailure(result)).toBe(true);
+  });
+
+  it("does not flag exit code 255 alone without lock indicators", () => {
+    const result = { response: "some output", exitCode: 255, stderr: "ssh: connect to host openshell-nemoclaw port 22: Connection refused" };
+    expect(isSessionLockFailure(result)).toBe(false);
+  });
+
+  it("detects 'session file locked' in stderr regardless of exit code", () => {
+    const result = {
+      response: "",
+      exitCode: 1,
+      stderr: "Error: session file locked (timeout 10000ms)",
+    };
+    expect(isSessionLockFailure(result)).toBe(true);
+  });
+
+  it("detects 'session file locked' in response when stderr is empty", () => {
+    const result = {
+      response: "Agent exited with code 1. session file locked (timeout 10000ms)",
+      exitCode: 1,
+      stderr: "",
+    };
+    expect(isSessionLockFailure(result)).toBe(true);
+  });
+
+  it("detects exit code 255 with session corruption message", () => {
+    const result = {
+      response: "",
+      exitCode: 255,
+      stderr: "session file locked (timeout 10000ms)",
+    };
+    expect(isSessionLockFailure(result)).toBe(true);
+  });
+
+  it("does not flag a normal successful result", () => {
+    const result = { response: "Hello!", exitCode: 0, stderr: "" };
+    expect(isSessionLockFailure(result)).toBe(false);
+  });
+
+  it("does not flag a generic non-lock failure (e.g. OOM, timeout)", () => {
+    const result = {
+      response: "Agent exited with code 137. Killed",
+      exitCode: 137,
+      stderr: "Killed",
+    };
+    expect(isSessionLockFailure(result)).toBe(false);
+  });
+
+  it("does not flag a normal non-zero exit without lock indicators", () => {
+    const result = {
+      response: "Error: connection refused",
+      exitCode: 1,
+      stderr: "ssh: connect to host openshell-nemoclaw port 22: Connection refused",
+    };
+    expect(isSessionLockFailure(result)).toBe(false);
+  });
+
+  it("handles missing stderr and response gracefully", () => {
+    const result = { response: undefined, exitCode: 0, stderr: undefined };
+    expect(isSessionLockFailure(result)).toBe(false);
+  });
+});
+
+describe("telegram-bridge session reset wiring", () => {
+  it("calls isSessionLockFailure in the poll handler", () => {
+    expect(BRIDGE_SRC).toContain("isSessionLockFailure(result)");
+  });
+
+  it("deletes activeSessions on lock failure", () => {
+    // The lock-failure branch must clear the session so the next message
+    // starts fresh instead of resuming corrupted context.
+    const lockBlock = BRIDGE_SRC.slice(
+      BRIDGE_SRC.indexOf("if (isSessionLockFailure(result))"),
+    );
+    expect(lockBlock).toContain("activeSessions.delete(chatId)");
+  });
+
+  it("does not delete activeSessions for normal responses", () => {
+    // Outside the lock-failure branch, the session should be preserved.
+    // Find the line after the lock-failure block that logs the normal response.
+    const normalPath = BRIDGE_SRC.match(
+      /console\.log\(`\[\$\{chatId\}\] agent:.*result\.response/,
+    );
+    expect(normalPath).not.toBeNull();
+  });
+
+  it("notifies the user when a session is auto-reset", () => {
+    const lockBlock = BRIDGE_SRC.slice(
+      BRIDGE_SRC.indexOf("if (isSessionLockFailure(result))"),
+    );
+    expect(lockBlock).toContain("sendMessage(chatId");
+    expect(lockBlock).toMatch(/session.*reset/i);
+  });
+});

From 6d93a8c61d63f059a6dfb18763e11bd3e16d9535 Mon Sep 17 00:00:00 2001
From: Alan Pena <alanpena91@gmail.com>
Date: Wed, 25 Mar 2026 00:34:56 -0400
Subject: [PATCH 2/3] fix: guard lock detection against false positives and
 rotate session ID on reset

- Only check result.response for lock text when exitCode !== 0, so a
  successful reply quoting the error string is not misidentified.
- Replace activeSessions.delete() with rotateSession() that assigns a
  new unique session suffix, ensuring the next runAgentInSandbox call
  uses a different --session-id and avoids reopening the corrupted
  remote session file.
- Update tests: add false-positive coverage, verify rotateSession
  wiring and getSessionId usage.
---
 scripts/telegram-bridge.js                 | 32 ++++++++++++++++------
 test/telegram-bridge-session-reset.test.js | 24 ++++++++--------
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/scripts/telegram-bridge.js b/scripts/telegram-bridge.js
index c023c2d6b..783223125 100755
--- a/scripts/telegram-bridge.js
+++ b/scripts/telegram-bridge.js
@@ -39,7 +39,19 @@ if (!TOKEN) { console.error("TELEGRAM_BOT_TOKEN required"); process.exit(1); }
 if (!API_KEY) { console.error("NVIDIA_API_KEY required"); process.exit(1); }
 
 let offset = 0;
-const activeSessions = new Map(); // chatId → message history
+const activeSessions = new Map(); // chatId → session suffix
+let sessionCounter = 0;
+
+function getSessionId(chatId) {
+  if (!activeSessions.has(chatId)) {
+    activeSessions.set(chatId, `${chatId}-${++sessionCounter}`);
+  }
+  return activeSessions.get(chatId);
+}
+
+function rotateSession(chatId) {
+  activeSessions.set(chatId, `${chatId}-${++sessionCounter}`);
+}
 
 // ── Telegram API helpers ──────────────────────────────────────────
 
@@ -164,9 +176,13 @@ function runAgentInSandbox(message, sessionId) {
  * combined with lock-related output.
  */
 function isSessionLockFailure(result) {
-  const output = `${result.stderr || ""} ${result.response || ""}`;
-  if (output.includes("session file locked")) return true;
-  if (result.exitCode === 255 && /lock|session.*corrupt/i.test(output)) return true;
+  const stderr = result.stderr || "";
+  // Always check stderr regardless of exit code.
+  if (stderr.includes("session file locked")) return true;
+  // Only check response text when the process actually failed — a successful
+  // reply that quotes the error string (e.g. explaining the error) is not a lock failure.
+  if (result.exitCode !== 0 && (result.response || "").includes("session file locked")) return true;
+  if (result.exitCode === 255 && /lock|session.*corrupt/i.test(`${stderr} ${result.response || ""}`)) return true;
   return false;
 }
 
@@ -209,7 +225,7 @@ async function poll() {
 
         // Handle /reset
         if (msg.text === "/reset") {
-          activeSessions.delete(chatId);
+          rotateSession(chatId);
           await sendMessage(chatId, "Session reset.", msg.message_id);
           continue;
         }
@@ -221,14 +237,14 @@ async function poll() {
         const typingInterval = setInterval(() => sendTyping(chatId), 4000);
 
         try {
-          const result = await runAgentInSandbox(msg.text, chatId);
+          const result = await runAgentInSandbox(msg.text, getSessionId(chatId));
           clearInterval(typingInterval);
 
           // Detect session lock failures and auto-reset to prevent corrupted
           // context from persisting into subsequent messages (#833).
           if (isSessionLockFailure(result)) {
-            activeSessions.delete(chatId);
-            console.error(`[${chatId}] session lock failure — session reset`);
+            rotateSession(chatId);
+            console.error(`[${chatId}] session lock failure — session rotated`);
             await sendMessage(chatId, "⚠️ Session error detected — your session has been automatically reset. Please resend your message.", msg.message_id);
             continue;
           }
diff --git a/test/telegram-bridge-session-reset.test.js b/test/telegram-bridge-session-reset.test.js
index 5eadedb24..af1e64295 100644
--- a/test/telegram-bridge-session-reset.test.js
+++ b/test/telegram-bridge-session-reset.test.js
@@ -69,6 +69,15 @@ describe("isSessionLockFailure", () => {
     expect(isSessionLockFailure(result)).toBe(false);
   });
 
+  it("does not flag a successful reply that quotes the error text", () => {
+    const result = {
+      response: 'The error "session file locked" means another process is using the file.',
+      exitCode: 0,
+      stderr: "",
+    };
+    expect(isSessionLockFailure(result)).toBe(false);
+  });
+
   it("does not flag a generic non-lock failure (e.g. OOM, timeout)", () => {
     const result = {
       response: "Agent exited with code 137. Killed",
@@ -98,22 +107,15 @@ describe("telegram-bridge session reset wiring", () => {
     expect(BRIDGE_SRC).toContain("isSessionLockFailure(result)");
   });
 
-  it("deletes activeSessions on lock failure", () => {
-    // The lock-failure branch must clear the session so the next message
-    // starts fresh instead of resuming corrupted context.
+  it("rotates session on lock failure instead of just deleting", () => {
     const lockBlock = BRIDGE_SRC.slice(
       BRIDGE_SRC.indexOf("if (isSessionLockFailure(result))"),
     );
-    expect(lockBlock).toContain("activeSessions.delete(chatId)");
+    expect(lockBlock).toContain("rotateSession(chatId)");
   });
 
-  it("does not delete activeSessions for normal responses", () => {
-    // Outside the lock-failure branch, the session should be preserved.
-    // Find the line after the lock-failure block that logs the normal response.
-    const normalPath = BRIDGE_SRC.match(
-      /console\.log\(`\[\$\{chatId\}\] agent:.*result\.response/,
-    );
-    expect(normalPath).not.toBeNull();
+  it("uses getSessionId when calling runAgentInSandbox", () => {
+    expect(BRIDGE_SRC).toContain("runAgentInSandbox(msg.text, getSessionId(chatId))");
   });
 
   it("notifies the user when a session is auto-reset", () => {

From a91573ebae529e77c250e9b7e8fc09de48a36eda Mon Sep 17 00:00:00 2001
From: Alan Pena <alanpena91@gmail.com>
Date: Wed, 25 Mar 2026 00:46:16 -0400
Subject: [PATCH 3/3] fix: use collision-resistant session IDs, tighten lock
 regex, guard top-level init

- Replace deterministic chatId-counter with crypto.randomUUID() so
  restarted bridges never recreate abandoned session IDs.
- Tighten exit-255 fallback regex to require session-specific phrasing
  (session.*lock, lock.*session, session.*corrupt) instead of bare 'lock'.
- Move env/OpenShell validation into init(), called only when
  require.main === module, so requiring the file in tests is safe.
- Add test for bare 'lock' false positive on exit 255.
---
 scripts/telegram-bridge.js                 | 49 +++++++++++++---------
 test/telegram-bridge-session-reset.test.js |  9 +++-
 2 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/scripts/telegram-bridge.js b/scripts/telegram-bridge.js
index 783223125..9f1b0929b 100755
--- a/scripts/telegram-bridge.js
+++ b/scripts/telegram-bridge.js
@@ -16,41 +16,51 @@
  *   ALLOWED_CHAT_IDS    — comma-separated Telegram chat IDs to accept (optional, accepts all if unset)
  */
 
+const crypto = require("crypto");
 const https = require("https");
 const { execFileSync, spawn } = require("child_process");
 const { resolveOpenshell } = require("../bin/lib/resolve-openshell");
 const { shellQuote, validateName } = require("../bin/lib/runner");
 
-const OPENSHELL = resolveOpenshell();
-if (!OPENSHELL) {
-  console.error("openshell not found on PATH or in common locations");
-  process.exit(1);
-}
+// Lazy-initialized by init() so requiring the module in tests does not
+// trigger process.exit from missing env vars or openshell resolution.
+let OPENSHELL;
+let TOKEN;
+let API_KEY;
+let SANDBOX;
+let ALLOWED_CHATS;
+
+function init() {
+  OPENSHELL = resolveOpenshell();
+  if (!OPENSHELL) {
+    console.error("openshell not found on PATH or in common locations");
+    process.exit(1);
+  }
 
-const TOKEN = process.env.TELEGRAM_BOT_TOKEN;
-const API_KEY = process.env.NVIDIA_API_KEY;
-const SANDBOX = process.env.SANDBOX_NAME || "nemoclaw";
-try { validateName(SANDBOX, "SANDBOX_NAME"); } catch (e) { console.error(e.message); process.exit(1); }
-const ALLOWED_CHATS = process.env.ALLOWED_CHAT_IDS
-  ? process.env.ALLOWED_CHAT_IDS.split(",").map((s) => s.trim())
-  : null;
+  TOKEN = process.env.TELEGRAM_BOT_TOKEN;
+  API_KEY = process.env.NVIDIA_API_KEY;
+  SANDBOX = process.env.SANDBOX_NAME || "nemoclaw";
+  try { validateName(SANDBOX, "SANDBOX_NAME"); } catch (e) { console.error(e.message); process.exit(1); }
+  ALLOWED_CHATS = process.env.ALLOWED_CHAT_IDS
+    ? process.env.ALLOWED_CHAT_IDS.split(",").map((s) => s.trim())
+    : null;
 
-if (!TOKEN) { console.error("TELEGRAM_BOT_TOKEN required"); process.exit(1); }
-if (!API_KEY) { console.error("NVIDIA_API_KEY required"); process.exit(1); }
+  if (!TOKEN) { console.error("TELEGRAM_BOT_TOKEN required"); process.exit(1); }
+  if (!API_KEY) { console.error("NVIDIA_API_KEY required"); process.exit(1); }
+}
 
 let offset = 0;
-const activeSessions = new Map(); // chatId → session suffix
-let sessionCounter = 0;
+const activeSessions = new Map(); // chatId → session UUID suffix
 
 function getSessionId(chatId) {
   if (!activeSessions.has(chatId)) {
-    activeSessions.set(chatId, `${chatId}-${++sessionCounter}`);
+    activeSessions.set(chatId, `${chatId}-${crypto.randomUUID()}`);
   }
   return activeSessions.get(chatId);
 }
 
 function rotateSession(chatId) {
-  activeSessions.set(chatId, `${chatId}-${++sessionCounter}`);
+  activeSessions.set(chatId, `${chatId}-${crypto.randomUUID()}`);
 }
 
 // ── Telegram API helpers ──────────────────────────────────────────
@@ -182,7 +192,7 @@ function isSessionLockFailure(result) {
   // Only check response text when the process actually failed — a successful
   // reply that quotes the error string (e.g. explaining the error) is not a lock failure.
   if (result.exitCode !== 0 && (result.response || "").includes("session file locked")) return true;
-  if (result.exitCode === 255 && /lock|session.*corrupt/i.test(`${stderr} ${result.response || ""}`)) return true;
+  if (result.exitCode === 255 && /(session.*lock|lock.*session|session.*corrupt|corrupt.*session)/i.test(`${stderr} ${result.response || ""}`)) return true;
   return false;
 }
 
@@ -292,5 +302,6 @@ async function main() {
 }
 
 if (require.main === module) {
+  init();
   main();
 }
diff --git a/test/telegram-bridge-session-reset.test.js b/test/telegram-bridge-session-reset.test.js
index af1e64295..1b7f2dfec 100644
--- a/test/telegram-bridge-session-reset.test.js
+++ b/test/telegram-bridge-session-reset.test.js
@@ -27,11 +27,16 @@ const isSessionLockFailure = new Function(
 )();
 
 describe("isSessionLockFailure", () => {
-  it("detects exit code 255 as a lock failure when stderr mentions lock", () => {
-    const result = { response: "some output", exitCode: 255, stderr: "lock timeout" };
+  it("detects exit code 255 as a lock failure when stderr mentions session lock", () => {
+    const result = { response: "some output", exitCode: 255, stderr: "session lock timeout" };
     expect(isSessionLockFailure(result)).toBe(true);
   });
 
+  it("does not flag exit code 255 with bare 'lock' unrelated to session", () => {
+    const result = { response: "some output", exitCode: 255, stderr: "lock timeout on resource" };
+    expect(isSessionLockFailure(result)).toBe(false);
+  });
+
   it("does not flag exit code 255 alone without lock indicators", () => {
     const result = { response: "some output", exitCode: 255, stderr: "ssh: connect to host openshell-nemoclaw port 22: Connection refused" };
     expect(isSessionLockFailure(result)).toBe(false);