From 04eb1c1cbc19d95f857088dbeb29f909c37c33ba Mon Sep 17 00:00:00 2001 From: Aleksey Shugaev Date: Tue, 26 May 2026 14:34:08 +0000 Subject: [PATCH] Stop SHUTDOWN_MODE middleware from killing the polling loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In production we observed that /help (and every other message) silently stopped getting any reply once SHUTDOWN_MODE was on. Root cause is in node_modules/telegraf/telegraf.js fetchUpdates: when handleUpdates rejects, the .catch branch flips polling.started = false and never recovers. Any throw escaping a middleware kills long-polling for the rest of the process lifetime. shutdownMode was the culprit. ctx.reply asserts ctx.chat and throws on updates that have no chat (my_chat_member, chat_join_request, …) — exactly the kind of updates that come in en masse after the farewell goes out and users start blocking the bot. One such update in the startup batch was enough to take all four bots off the air. Two narrow guards: * skip the update entirely when ctx.chat is missing * try/catch around ctx.reply so per-send failures (blocked-by-user, rate-limit, etc.) do not escape either Both leave the polling loop intact so /help keeps reaching users. --- src/middlewares/shutdownMode.test.ts | 56 ++++++++++++++++++++++++---- src/middlewares/shutdownMode.ts | 16 +++++++- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/src/middlewares/shutdownMode.test.ts b/src/middlewares/shutdownMode.test.ts index 4d65257..01e6619 100644 --- a/src/middlewares/shutdownMode.test.ts +++ b/src/middlewares/shutdownMode.test.ts @@ -1,14 +1,28 @@ +jest.mock('@/helpers/log', () => ({ + log: { error: jest.fn(), info: jest.fn() }, +})) + +import { log } from '@/helpers/log' import { SHUTDOWN_MESSAGE, shutdownMode } from '@/middlewares/shutdownMode' -const makeCtx = () => - ({ - reply: jest.fn().mockResolvedValue(undefined), - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } as any) +type Ctx = { + chat?: { id: number } + reply: jest.Mock +} + +const makeCtx = (overrides: Partial = {}): Ctx => ({ + chat: { id: 1 }, + reply: jest.fn().mockResolvedValue(undefined), + ...overrides, +}) describe('shutdownMode middleware', () => { const originalEnv = process.env + beforeEach(() => { + jest.clearAllMocks() + }) + afterEach(() => { process.env = { ...originalEnv } }) @@ -17,7 +31,8 @@ describe('shutdownMode middleware', () => { delete process.env.SHUTDOWN_MODE const ctx = makeCtx() const next = jest.fn() - await shutdownMode(ctx, next) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await shutdownMode(ctx as any, next) expect(next).toHaveBeenCalledTimes(1) expect(ctx.reply).not.toHaveBeenCalled() }) @@ -26,8 +41,35 @@ describe('shutdownMode middleware', () => { process.env.SHUTDOWN_MODE = 'true' const ctx = makeCtx() const next = jest.fn() - await shutdownMode(ctx, next) + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await shutdownMode(ctx as any, next) expect(next).not.toHaveBeenCalled() expect(ctx.reply).toHaveBeenCalledWith(SHUTDOWN_MESSAGE) }) + + // The Telegraf 3.x polling loop kills itself permanently on any middleware + // throw (telegraf.js fetchUpdates -> handleUpdates rejection sets + // polling.started = false). The two cases below pin the two ways this + // middleware used to leak a rejection. + it('skips chatless updates instead of throwing on ctx.reply', async () => { + process.env.SHUTDOWN_MODE = 'true' + const ctx = makeCtx({ chat: undefined }) + const next = jest.fn() + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await expect(shutdownMode(ctx as any, next)).resolves.toBeUndefined() + expect(next).not.toHaveBeenCalled() + expect(ctx.reply).not.toHaveBeenCalled() + }) + + it('swallows reply failures (e.g. user blocked the bot)', async () => { + process.env.SHUTDOWN_MODE = 'true' + const ctx = makeCtx({ + reply: jest.fn().mockRejectedValue(new Error('Forbidden: bot blocked')), + }) + const next = jest.fn() + // eslint-disable-next-line @typescript-eslint/no-explicit-any + await expect(shutdownMode(ctx as any, next)).resolves.toBeUndefined() + expect(ctx.reply).toHaveBeenCalledWith(SHUTDOWN_MESSAGE) + expect(log.error).toHaveBeenCalled() + }) }) diff --git a/src/middlewares/shutdownMode.ts b/src/middlewares/shutdownMode.ts index 8889616..8f721ba 100644 --- a/src/middlewares/shutdownMode.ts +++ b/src/middlewares/shutdownMode.ts @@ -1,6 +1,7 @@ import { Context } from 'telegraf' import { isShutdownMode } from '@/helpers/isShutdownMode' +import { log } from '@/helpers/log' /* eslint-disable max-len */ export const SHUTDOWN_MESSAGE = `привет. я сделал гуся пять лет назад — тогда нигде не было удобных алертов по ценам. начал для себя, потом подтянулись люди. @@ -23,5 +24,18 @@ export const SHUTDOWN_MESSAGE = `привет. я сделал гуся пять // eslint-disable-next-line @typescript-eslint/no-explicit-any export async function shutdownMode(ctx: Context, next: () => any) { if (!isShutdownMode()) return next() - await ctx.reply(SHUTDOWN_MESSAGE) + // Skip updates without a chat (my_chat_member, chat_join_request, etc.). + // ctx.reply asserts ctx.chat and throws otherwise; an unhandled throw inside + // a middleware kills Telegraf 3.x's polling loop permanently (see + // node_modules/telegraf/telegraf.js fetchUpdates: it flips polling.started + // to false on any handleUpdates rejection), so a single chatless update + // would stop the bot from ever reading another /help. + if (!ctx.chat) return + try { + await ctx.reply(SHUTDOWN_MESSAGE) + } catch (e) { + // Per-send failures (e.g. user blocked the bot, Telegram rate-limit) must + // not escape: same polling-loop kill switch as above. + log.error('[SHUTDOWN MODE] reply failed', e) + } }