From faa7ada0f84f3fced49e2cc65be53e10234b4a83 Mon Sep 17 00:00:00 2001 From: Danilo Campana Fuchs Date: Thu, 21 May 2026 12:59:40 -0300 Subject: [PATCH 1/2] fix: register SIGINT/SIGTERM handlers in HTTP transport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HTTP transport had no signal handlers at all, so SIGTERM from orchestrators (Docker/Kubernetes/ECS) either bypassed connector teardown or hung the process until the orchestrator sent SIGKILL (exit 137). In either case the running server skipped clean disconnects for DB pools, SSH tunnels, and IAM refresh timers in the ConnectorManager. Extract the existing STDIO shutdown into a transport-agnostic helper that closes the active transport (HTTP server or stdio), calls connectorManager.disconnect(), stops the config watcher, and falls back to a forced exit after 25s so a stuck cleanup can't outlast typical orchestrator stop-timeouts. Wire it up from both transport branches. Add an integration test that asserts the server exits with code 0 within a few seconds of SIGTERM or SIGINT in HTTP mode — without the fix the process is killed by signal (exit code null), with the fix the handler runs and exits cleanly. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../graceful-shutdown-integration.test.ts | 92 +++++++++++++++++++ src/server.ts | 56 +++++++---- 2 files changed, 131 insertions(+), 17 deletions(-) create mode 100644 src/__tests__/graceful-shutdown-integration.test.ts diff --git a/src/__tests__/graceful-shutdown-integration.test.ts b/src/__tests__/graceful-shutdown-integration.test.ts new file mode 100644 index 0000000..c413ad6 --- /dev/null +++ b/src/__tests__/graceful-shutdown-integration.test.ts @@ -0,0 +1,92 @@ +import { describe, it, expect } from 'vitest'; +import { spawn, ChildProcess } from 'child_process'; +import fs from 'fs'; +import path from 'path'; +import os from 'os'; + +describe('Graceful shutdown', () => { + const testPort = 3099; + + async function waitForServerReady(baseUrl: string, timeoutMs = 20_000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + try { + const response = await fetch(`${baseUrl}/healthz`); + if (response.ok) return; + } catch { + // not ready yet + } + await new Promise((r) => setTimeout(r, 500)); + } + throw new Error('Server did not become ready in time'); + } + + function startServer(): { proc: ChildProcess; dbPath: string } { + const dbPath = path.join( + os.tmpdir(), + `graceful_shutdown_${Date.now()}_${Math.random().toString(36).slice(2, 11)}.db` + ); + // Spawn tsx directly (no `pnpm dev:backend` wrapper) so SIGTERM/SIGINT + // reach the Node process unmediated by pnpm / concurrently. + const proc = spawn( + 'node', + ['--import', 'tsx/esm', 'src/index.ts', '--transport=http'], + { + env: { + ...process.env, + DSN: `sqlite://${dbPath}`, + PORT: testPort.toString(), + NODE_ENV: 'test', + }, + stdio: 'pipe', + } + ); + proc.stderr?.on('data', (d) => process.stderr.write(`[server] ${d}`)); + return { proc, dbPath }; + } + + async function waitForExit( + proc: ChildProcess, + timeoutMs: number + ): Promise<{ code: number | null; signal: NodeJS.Signals | null; timedOut: boolean }> { + return new Promise((resolve) => { + const timer = setTimeout(() => { + resolve({ code: null, signal: null, timedOut: true }); + }, timeoutMs); + proc.once('exit', (code, signal) => { + clearTimeout(timer); + resolve({ code, signal, timedOut: false }); + }); + }); + } + + it('exits with code 0 within a few seconds of SIGTERM in HTTP mode', async () => { + const { proc, dbPath } = startServer(); + try { + await waitForServerReady(`http://localhost:${testPort}`); + proc.kill('SIGTERM'); + const result = await waitForExit(proc, 10_000); + expect(result.timedOut).toBe(false); + expect(result.code).toBe(0); + expect(result.signal).toBeNull(); + } finally { + if (!proc.killed) proc.kill('SIGKILL'); + if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); + } + }, 60_000); + + it('exits with code 0 within a few seconds of SIGINT in HTTP mode', async () => { + const { proc, dbPath } = startServer(); + try { + await waitForServerReady(`http://localhost:${testPort}`); + proc.kill('SIGINT'); + const result = await waitForExit(proc, 10_000); + expect(result.timedOut).toBe(false); + expect(result.code).toBe(0); + expect(result.signal).toBeNull(); + } finally { + if (!proc.killed) proc.kill('SIGKILL'); + if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); + } + }, 60_000); +}); diff --git a/src/server.ts b/src/server.ts index 4319e3a..9d52051 100644 --- a/src/server.ts +++ b/src/server.ts @@ -158,8 +158,35 @@ See documentation for more details on configuring database connections. ); console.error(generateStartupTable(sourceDisplayInfos)); - // Clean up config watcher when the process is exiting (covers both transports) - process.on("exit", () => { stopConfigWatcher?.(); }); + let isShuttingDown = false; + const installShutdownHandlers = (closeTransport: () => Promise) => { + const shutdown = async (signal: string) => { + if (isShuttingDown) return; + isShuttingDown = true; + console.error(`Received ${signal}, shutting down...`); + + const forceExit = setTimeout(() => { + console.error("Graceful shutdown timed out after 25s, forcing exit"); + process.exit(1); + }, 25_000); + forceExit.unref(); + + try { + await closeTransport(); + await connectorManager.disconnect(); + stopConfigWatcher?.(); + } catch (err) { + console.error("Error during shutdown:", err); + } finally { + clearTimeout(forceExit); + process.exit(0); + } + }; + + process.on("SIGINT", () => shutdown("SIGINT")); + process.on("SIGTERM", () => shutdown("SIGTERM")); + return shutdown; + }; // Set up transport-specific server if (transportData.type === "http") { @@ -258,7 +285,7 @@ See documentation for more details on configuring database connections. } // Start the HTTP server - app.listen(port, '0.0.0.0', () => { + const httpServer = app.listen(port, '0.0.0.0', () => { // In development mode, suggest using the Vite dev server for hot reloading if (process.env.NODE_ENV === 'development') { console.error('Development mode detected!'); @@ -270,6 +297,13 @@ See documentation for more details on configuring database connections. } console.error(`MCP server endpoint at http://localhost:${port}/mcp`); }); + + installShutdownHandlers( + () => + new Promise((resolve, reject) => + httpServer.close((err) => (err ? reject(err) : resolve())) + ) + ); } else { // STDIO transport: Pure MCP-over-stdio, no HTTP server const server = createServer(); @@ -277,24 +311,12 @@ See documentation for more details on configuring database connections. await server.connect(transport); console.error("MCP server running on stdio"); - let isShuttingDown = false; - const shutdown = async () => { - if (isShuttingDown) return; - isShuttingDown = true; - console.error("Shutting down..."); - await transport.close(); - await connectorManager.disconnect(); - process.exit(0); - }; - - // Listen for SIGINT/SIGTERM to gracefully shut down - process.on("SIGINT", shutdown); - process.on("SIGTERM", shutdown); + const shutdown = installShutdownHandlers(() => transport.close()); // Exit when stdin closes (parent process terminated). // On Windows, SIGINT/SIGTERM are not reliably sent when the parent // process exits — detecting stdin EOF is the portable way to handle this. - process.stdin.on("end", shutdown); + process.stdin.on("end", () => shutdown("stdin EOF")); } } catch (err) { console.error("Fatal error:", err); From 5297a3832322c83dbb6361824ba8a7ac2ffddf97 Mon Sep 17 00:00:00 2001 From: Danilo Campana Fuchs Date: Thu, 21 May 2026 15:42:17 -0300 Subject: [PATCH 2/2] review: address Copilot feedback on PR #318 - Make each shutdown step best-effort so a failing transport close still runs connector disconnect and config-watcher cleanup. - Track shutdown errors and exit non-zero on failure so orchestrators / CI can observe partial cleanup. - Test: pick an ephemeral port at runtime instead of a fixed 3099 to avoid collisions in parallel runs or busy CI runners. - Test: use proc.exitCode / signalCode instead of proc.killed to decide whether to force-kill the child; proc.killed flips true on the first signal even if the process is still alive, which could leak it. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../graceful-shutdown-integration.test.ts | 51 ++++++++++++++----- src/server.ts | 26 ++++++---- 2 files changed, 55 insertions(+), 22 deletions(-) diff --git a/src/__tests__/graceful-shutdown-integration.test.ts b/src/__tests__/graceful-shutdown-integration.test.ts index c413ad6..17de1a4 100644 --- a/src/__tests__/graceful-shutdown-integration.test.ts +++ b/src/__tests__/graceful-shutdown-integration.test.ts @@ -1,11 +1,28 @@ import { describe, it, expect } from 'vitest'; import { spawn, ChildProcess } from 'child_process'; +import { createServer } from 'net'; import fs from 'fs'; import path from 'path'; import os from 'os'; describe('Graceful shutdown', () => { - const testPort = 3099; + async function getEphemeralPort(): Promise { + return new Promise((resolve, reject) => { + const srv = createServer(); + srv.unref(); + srv.on('error', reject); + srv.listen(0, '127.0.0.1', () => { + const addr = srv.address(); + if (addr && typeof addr === 'object') { + const port = addr.port; + srv.close(() => resolve(port)); + } else { + srv.close(); + reject(new Error('Failed to acquire ephemeral port')); + } + }); + }); + } async function waitForServerReady(baseUrl: string, timeoutMs = 20_000): Promise { const deadline = Date.now() + timeoutMs; @@ -21,7 +38,8 @@ describe('Graceful shutdown', () => { throw new Error('Server did not become ready in time'); } - function startServer(): { proc: ChildProcess; dbPath: string } { + async function startServer(): Promise<{ proc: ChildProcess; dbPath: string; port: number }> { + const port = await getEphemeralPort(); const dbPath = path.join( os.tmpdir(), `graceful_shutdown_${Date.now()}_${Math.random().toString(36).slice(2, 11)}.db` @@ -35,14 +53,14 @@ describe('Graceful shutdown', () => { env: { ...process.env, DSN: `sqlite://${dbPath}`, - PORT: testPort.toString(), + PORT: port.toString(), NODE_ENV: 'test', }, stdio: 'pipe', } ); proc.stderr?.on('data', (d) => process.stderr.write(`[server] ${d}`)); - return { proc, dbPath }; + return { proc, dbPath, port }; } async function waitForExit( @@ -60,33 +78,42 @@ describe('Graceful shutdown', () => { }); } + async function cleanup(proc: ChildProcess, dbPath: string): Promise { + // `proc.killed` flips true as soon as we send any signal, even before + // the child has exited — check `exitCode`/`signalCode` to know whether + // the process is actually gone, and force-kill otherwise to avoid leaks. + if (proc.exitCode === null && proc.signalCode === null) { + proc.kill('SIGKILL'); + await waitForExit(proc, 5_000); + } + if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); + } + it('exits with code 0 within a few seconds of SIGTERM in HTTP mode', async () => { - const { proc, dbPath } = startServer(); + const { proc, dbPath, port } = await startServer(); try { - await waitForServerReady(`http://localhost:${testPort}`); + await waitForServerReady(`http://localhost:${port}`); proc.kill('SIGTERM'); const result = await waitForExit(proc, 10_000); expect(result.timedOut).toBe(false); expect(result.code).toBe(0); expect(result.signal).toBeNull(); } finally { - if (!proc.killed) proc.kill('SIGKILL'); - if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); + await cleanup(proc, dbPath); } }, 60_000); it('exits with code 0 within a few seconds of SIGINT in HTTP mode', async () => { - const { proc, dbPath } = startServer(); + const { proc, dbPath, port } = await startServer(); try { - await waitForServerReady(`http://localhost:${testPort}`); + await waitForServerReady(`http://localhost:${port}`); proc.kill('SIGINT'); const result = await waitForExit(proc, 10_000); expect(result.timedOut).toBe(false); expect(result.code).toBe(0); expect(result.signal).toBeNull(); } finally { - if (!proc.killed) proc.kill('SIGKILL'); - if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); + await cleanup(proc, dbPath); } }, 60_000); }); diff --git a/src/server.ts b/src/server.ts index 9d52051..24cb3a6 100644 --- a/src/server.ts +++ b/src/server.ts @@ -171,16 +171,22 @@ See documentation for more details on configuring database connections. }, 25_000); forceExit.unref(); - try { - await closeTransport(); - await connectorManager.disconnect(); - stopConfigWatcher?.(); - } catch (err) { - console.error("Error during shutdown:", err); - } finally { - clearTimeout(forceExit); - process.exit(0); - } + let hadError = false; + const step = async (label: string, fn: () => unknown | Promise) => { + try { + await fn(); + } catch (err) { + hadError = true; + console.error(`Error during shutdown (${label}):`, err); + } + }; + + await step("close transport", closeTransport); + await step("disconnect connectors", () => connectorManager.disconnect()); + await step("stop config watcher", () => stopConfigWatcher?.()); + + clearTimeout(forceExit); + process.exit(hadError ? 1 : 0); }; process.on("SIGINT", () => shutdown("SIGINT"));