diff --git a/scripts/telegram-bridge.js b/scripts/telegram-bridge.js index c51a5529a..9f1b0929b 100755 --- a/scripts/telegram-bridge.js +++ b/scripts/telegram-bridge.js @@ -16,30 +16,52 @@ * ALLOWED_CHAT_IDS — comma-separated Telegram chat IDs to accept (optional, accepts all if unset) */ +const crypto = require("crypto"); const https = require("https"); const { execFileSync, spawn } = require("child_process"); const { resolveOpenshell } = require("../bin/lib/resolve-openshell"); const { shellQuote, validateName } = require("../bin/lib/runner"); -const OPENSHELL = resolveOpenshell(); -if (!OPENSHELL) { - console.error("openshell not found on PATH or in common locations"); - process.exit(1); -} +// Lazy-initialized by init() so requiring the module in tests does not +// trigger process.exit from missing env vars or openshell resolution. +let OPENSHELL; +let TOKEN; +let API_KEY; +let SANDBOX; +let ALLOWED_CHATS; + +function init() { + OPENSHELL = resolveOpenshell(); + if (!OPENSHELL) { + console.error("openshell not found on PATH or in common locations"); + process.exit(1); + } -const TOKEN = process.env.TELEGRAM_BOT_TOKEN; -const API_KEY = process.env.NVIDIA_API_KEY; -const SANDBOX = process.env.SANDBOX_NAME || "nemoclaw"; -try { validateName(SANDBOX, "SANDBOX_NAME"); } catch (e) { console.error(e.message); process.exit(1); } -const ALLOWED_CHATS = process.env.ALLOWED_CHAT_IDS - ? process.env.ALLOWED_CHAT_IDS.split(",").map((s) => s.trim()) - : null; + TOKEN = process.env.TELEGRAM_BOT_TOKEN; + API_KEY = process.env.NVIDIA_API_KEY; + SANDBOX = process.env.SANDBOX_NAME || "nemoclaw"; + try { validateName(SANDBOX, "SANDBOX_NAME"); } catch (e) { console.error(e.message); process.exit(1); } + ALLOWED_CHATS = process.env.ALLOWED_CHAT_IDS + ? process.env.ALLOWED_CHAT_IDS.split(",").map((s) => s.trim()) + : null; -if (!TOKEN) { console.error("TELEGRAM_BOT_TOKEN required"); process.exit(1); } -if (!API_KEY) { console.error("NVIDIA_API_KEY required"); process.exit(1); } + if (!TOKEN) { console.error("TELEGRAM_BOT_TOKEN required"); process.exit(1); } + if (!API_KEY) { console.error("NVIDIA_API_KEY required"); process.exit(1); } +} let offset = 0; -const activeSessions = new Map(); // chatId → message history +const activeSessions = new Map(); // chatId → session UUID suffix + +function getSessionId(chatId) { + if (!activeSessions.has(chatId)) { + activeSessions.set(chatId, `${chatId}-${crypto.randomUUID()}`); + } + return activeSessions.get(chatId); +} + +function rotateSession(chatId) { + activeSessions.set(chatId, `${chatId}-${crypto.randomUUID()}`); +} // ── Telegram API helpers ────────────────────────────────────────── @@ -140,20 +162,40 @@ function runAgentInSandbox(message, sessionId) { const response = responseLines.join("\n").trim(); if (response) { - resolve(response); + resolve({ response, exitCode: code, stderr }); } else if (code !== 0) { - resolve(`Agent exited with code ${code}. ${stderr.trim().slice(0, 500)}`); + resolve({ response: `Agent exited with code ${code}. ${stderr.trim().slice(0, 500)}`, exitCode: code, stderr }); } else { - resolve("(no response)"); + resolve({ response: "(no response)", exitCode: code, stderr }); } }); proc.on("error", (err) => { - resolve(`Error: ${err.message}`); + resolve({ response: `Error: ${err.message}`, exitCode: 1, stderr: err.message }); }); }); } +// ── Session lock detection ──────────────────────────────────────── + +/** + * Returns true when an agent result indicates a session-file lock collision. + * Only matches the specific lock/corruption class of errors — not general failures. + * Exit code 255 alone is not sufficient (SSH uses it for connection errors too), + * so we require either an explicit lock message in stderr/response, or code 255 + * combined with lock-related output. + */ +function isSessionLockFailure(result) { + const stderr = result.stderr || ""; + // Always check stderr regardless of exit code. + if (stderr.includes("session file locked")) return true; + // Only check response text when the process actually failed — a successful + // reply that quotes the error string (e.g. explaining the error) is not a lock failure. + if (result.exitCode !== 0 && (result.response || "").includes("session file locked")) return true; + if (result.exitCode === 255 && /(session.*lock|lock.*session|session.*corrupt|corrupt.*session)/i.test(`${stderr} ${result.response || ""}`)) return true; + return false; +} + // ── Poll loop ───────────────────────────────────────────────────── async function poll() { @@ -193,7 +235,7 @@ async function poll() { // Handle /reset if (msg.text === "/reset") { - activeSessions.delete(chatId); + rotateSession(chatId); await sendMessage(chatId, "Session reset.", msg.message_id); continue; } @@ -205,10 +247,20 @@ async function poll() { const typingInterval = setInterval(() => sendTyping(chatId), 4000); try { - const response = await runAgentInSandbox(msg.text, chatId); + const result = await runAgentInSandbox(msg.text, getSessionId(chatId)); clearInterval(typingInterval); - console.log(`[${chatId}] agent: ${response.slice(0, 100)}...`); - await sendMessage(chatId, response, msg.message_id); + + // Detect session lock failures and auto-reset to prevent corrupted + // context from persisting into subsequent messages (#833). + if (isSessionLockFailure(result)) { + rotateSession(chatId); + console.error(`[${chatId}] session lock failure — session rotated`); + await sendMessage(chatId, "⚠️ Session error detected — your session has been automatically reset. Please resend your message.", msg.message_id); + continue; + } + + console.log(`[${chatId}] agent: ${result.response.slice(0, 100)}...`); + await sendMessage(chatId, result.response, msg.message_id); } catch (err) { clearInterval(typingInterval); await sendMessage(chatId, `Error: ${err.message}`, msg.message_id); @@ -249,4 +301,7 @@ async function main() { poll(); } -main(); +if (require.main === module) { + init(); + main(); +} diff --git a/test/telegram-bridge-session-reset.test.js b/test/telegram-bridge-session-reset.test.js new file mode 100644 index 000000000..1b7f2dfec --- /dev/null +++ b/test/telegram-bridge-session-reset.test.js @@ -0,0 +1,133 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Regression tests for #833: session lock failures must auto-reset the session +// so the next message starts with clean context instead of resuming corrupted +// conversation history. + +import fs from "node:fs"; +import path from "node:path"; +import { describe, it, expect } from "vitest"; + +const BRIDGE_SRC = fs.readFileSync( + path.join(import.meta.dirname, "..", "scripts", "telegram-bridge.js"), + "utf-8", +); + +// Extract and evaluate the isSessionLockFailure function from the source +// so we test the real implementation without triggering the script's +// top-level side effects (process.exit, openshell resolution, etc.). + +const fnMatch = BRIDGE_SRC.match( + /function isSessionLockFailure\(result\)\s*\{[\s\S]*?^\}/m, +); +if (!fnMatch) throw new Error("Could not extract isSessionLockFailure from telegram-bridge.js"); +const isSessionLockFailure = new Function( + `${fnMatch[0]}; return isSessionLockFailure;`, +)(); + +describe("isSessionLockFailure", () => { + it("detects exit code 255 as a lock failure when stderr mentions session lock", () => { + const result = { response: "some output", exitCode: 255, stderr: "session lock timeout" }; + expect(isSessionLockFailure(result)).toBe(true); + }); + + it("does not flag exit code 255 with bare 'lock' unrelated to session", () => { + const result = { response: "some output", exitCode: 255, stderr: "lock timeout on resource" }; + expect(isSessionLockFailure(result)).toBe(false); + }); + + it("does not flag exit code 255 alone without lock indicators", () => { + const result = { response: "some output", exitCode: 255, stderr: "ssh: connect to host openshell-nemoclaw port 22: Connection refused" }; + expect(isSessionLockFailure(result)).toBe(false); + }); + + it("detects 'session file locked' in stderr regardless of exit code", () => { + const result = { + response: "", + exitCode: 1, + stderr: "Error: session file locked (timeout 10000ms)", + }; + expect(isSessionLockFailure(result)).toBe(true); + }); + + it("detects 'session file locked' in response when stderr is empty", () => { + const result = { + response: "Agent exited with code 1. session file locked (timeout 10000ms)", + exitCode: 1, + stderr: "", + }; + expect(isSessionLockFailure(result)).toBe(true); + }); + + it("detects exit code 255 with session corruption message", () => { + const result = { + response: "", + exitCode: 255, + stderr: "session file locked (timeout 10000ms)", + }; + expect(isSessionLockFailure(result)).toBe(true); + }); + + it("does not flag a normal successful result", () => { + const result = { response: "Hello!", exitCode: 0, stderr: "" }; + expect(isSessionLockFailure(result)).toBe(false); + }); + + it("does not flag a successful reply that quotes the error text", () => { + const result = { + response: 'The error "session file locked" means another process is using the file.', + exitCode: 0, + stderr: "", + }; + expect(isSessionLockFailure(result)).toBe(false); + }); + + it("does not flag a generic non-lock failure (e.g. OOM, timeout)", () => { + const result = { + response: "Agent exited with code 137. Killed", + exitCode: 137, + stderr: "Killed", + }; + expect(isSessionLockFailure(result)).toBe(false); + }); + + it("does not flag a normal non-zero exit without lock indicators", () => { + const result = { + response: "Error: connection refused", + exitCode: 1, + stderr: "ssh: connect to host openshell-nemoclaw port 22: Connection refused", + }; + expect(isSessionLockFailure(result)).toBe(false); + }); + + it("handles missing stderr and response gracefully", () => { + const result = { response: undefined, exitCode: 0, stderr: undefined }; + expect(isSessionLockFailure(result)).toBe(false); + }); +}); + +describe("telegram-bridge session reset wiring", () => { + it("calls isSessionLockFailure in the poll handler", () => { + expect(BRIDGE_SRC).toContain("isSessionLockFailure(result)"); + }); + + it("rotates session on lock failure instead of just deleting", () => { + const lockBlock = BRIDGE_SRC.slice( + BRIDGE_SRC.indexOf("if (isSessionLockFailure(result))"), + ); + expect(lockBlock).toContain("rotateSession(chatId)"); + }); + + it("uses getSessionId when calling runAgentInSandbox", () => { + expect(BRIDGE_SRC).toContain("runAgentInSandbox(msg.text, getSessionId(chatId))"); + }); + + it("notifies the user when a session is auto-reset", () => { + const lockBlock = BRIDGE_SRC.slice( + BRIDGE_SRC.indexOf("if (isSessionLockFailure(result))"), + ); + expect(lockBlock).toContain("sendMessage(chatId"); + expect(lockBlock).toMatch(/session.*reset/i); + }); +});