diff --git a/.env.example b/.env.example index 1967b3e..4ce48a7 100644 --- a/.env.example +++ b/.env.example @@ -1 +1,3 @@ -TOKEN="YOUR BOT TOKEN HERE" \ No newline at end of file +TOKEN="YOUR BOT TOKEN HERE" +HEALTHCHECK_HOST="127.0.0.1" +HEALTHCHECK_PORT="8080" diff --git a/Dockerfile b/Dockerfile index 839ee5c..56b2614 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,8 @@ FROM python:${PYTHON_VERSION}-slim-bookworm AS python-base ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 +ENV HEALTHCHECK_HOST=127.0.0.1 +ENV HEALTHCHECK_PORT=8080 RUN pip install uv @@ -19,6 +21,8 @@ ENV PYTHONUNBUFFERED=1 WORKDIR /app +RUN apt-get update && apt-get install -y --no-install-recommends wget && rm -rf /var/lib/apt/lists/* + RUN adduser -u 8192 --disabled-password --gecos "" appuser && chown -R appuser /app COPY --from=python-base --chown=appuser /app/requirements.txt ./ @@ -28,4 +32,8 @@ RUN pip install -r requirements.txt COPY src/ ./src USER appuser +EXPOSE 8080 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 CMD ["sh", "-c", "wget -q -T 3 -O /dev/null \"http://${HEALTHCHECK_HOST}:${HEALTHCHECK_PORT}/health\""] + CMD ["python", "-m", "src"] diff --git a/README.md b/README.md index 2f214e8..b1e7150 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,22 @@ Versa is a simple utility Discord bot, with the main goal of being open source a #### ⚠ **Further support with self-hosting will not be provided.** ⚠ +### Healthcheck endpoint + +The bot exposes an HTTP healthcheck endpoint for deployment platforms (such as Coolify). + +- Method/path: `GET /health` +- Docker default bind: `127.0.0.1:8080` (set via `ENV` in `Dockerfile`) +- Config via env vars: + - `HEALTHCHECK_HOST` + - `HEALTHCHECK_PORT` + - Leave `HEALTHCHECK_HOST` unset or set to an empty string to disable the healthcheck server + +The endpoint returns: +- `200` when DB is responsive, Discord is connected, Discord shard heartbeat latency is healthy, and no global Discord rate-limit is active +- `503` when any check is failing + # License This project is licensed under AGPL-3.0. Forks and redistributions must remain open-source. See the LICENSE file for -further info \ No newline at end of file +further info diff --git a/src/__main__.py b/src/__main__.py index b02a597..c0dea3b 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -4,10 +4,10 @@ import discord from src import log_setup +from src.cogs.healthcheck import HEALTHCHECK_COG_NAME, HealthcheckCog +from src.config import TOKEN from src.database import init_db, shutdown_db -from .config import TOKEN - log_setup.setup_logging(logging.INFO) logger = logging.getLogger(__name__) @@ -40,6 +40,14 @@ async def start() -> None: except Exception as e: # noqa: BLE001 original_exc = e finally: + healthcheck_stop_exc = None + healthcheck_cog = bot.get_cog(HEALTHCHECK_COG_NAME) + try: + if isinstance(healthcheck_cog, HealthcheckCog): + await healthcheck_cog.stop_server() + except Exception as healthcheck_exc: # noqa: BLE001 + healthcheck_stop_exc = healthcheck_exc + try: await shutdown_db() except Exception as e2: @@ -47,7 +55,17 @@ async def start() -> None: msg = "Multiple errors happened when starting the bot" raise ExceptionGroup(msg, [original_exc, e2]) from None + if healthcheck_stop_exc: + msg = "Multiple errors happened during shutdown" + + raise ExceptionGroup(msg, [healthcheck_stop_exc, e2]) from None raise + if healthcheck_stop_exc: + if original_exc: + msg = "Multiple errors happened when starting the bot" + + raise ExceptionGroup(msg, [original_exc, healthcheck_stop_exc]) from None + raise healthcheck_stop_exc if original_exc: raise original_exc diff --git a/src/cogs/healthcheck.py b/src/cogs/healthcheck.py new file mode 100644 index 0000000..8c599d3 --- /dev/null +++ b/src/cogs/healthcheck.py @@ -0,0 +1,56 @@ +import logging + +import discord + +from src.config import HEALTHCHECK_HOST, HEALTHCHECK_PORT_RAW +from src.runtime_healthcheck import HealthcheckServer + +logger = logging.getLogger(__name__) +HEALTHCHECK_COG_NAME = "healthcheck" + + +class HealthcheckCog(discord.Cog, name=HEALTHCHECK_COG_NAME): + def __init__(self, bot: discord.Bot) -> None: + self.bot: discord.Bot = bot + self.healthcheck_server: HealthcheckServer | None = None + if HEALTHCHECK_HOST: + if HEALTHCHECK_PORT_RAW is None: + msg = ( + "Environment variable HEALTHCHECK_PORT must be set when " + f"HEALTHCHECK_HOST is configured (HEALTHCHECK_HOST={HEALTHCHECK_HOST})" + ) + raise RuntimeError(msg) + + try: + healthcheck_port = int(HEALTHCHECK_PORT_RAW) + except ValueError as e: + msg = ( + "Environment variable HEALTHCHECK_PORT must be a valid integer when " + "HEALTHCHECK_HOST is configured " + f"(HEALTHCHECK_HOST={HEALTHCHECK_HOST}, HEALTHCHECK_PORT={HEALTHCHECK_PORT_RAW})" + ) + raise RuntimeError(msg) from e + + self.healthcheck_server = HealthcheckServer( + bot, + host=HEALTHCHECK_HOST, + port=healthcheck_port, + ) + + @discord.Cog.listener(once=True) + async def on_connect(self) -> None: + if self.healthcheck_server is None: + logger.info("Healthcheck server disabled because HEALTHCHECK_HOST is unset/empty") + return + + await self.healthcheck_server.start() + logger.info("Healthcheck server started from healthcheck cog") + + async def stop_server(self) -> None: + if self.healthcheck_server is None: + return + await self.healthcheck_server.stop() + + +def setup(bot: discord.Bot) -> None: + bot.add_cog(HealthcheckCog(bot)) diff --git a/src/config.py b/src/config.py index c96d8ab..500eff2 100644 --- a/src/config.py +++ b/src/config.py @@ -9,3 +9,6 @@ DB_PATH = Path(os.getenv("DB_PATH") or Path("data/database.db")).absolute() DB_PATH.parent.mkdir(parents=True, exist_ok=True) + +HEALTHCHECK_HOST = os.getenv("HEALTHCHECK_HOST") +HEALTHCHECK_PORT_RAW = os.getenv("HEALTHCHECK_PORT") diff --git a/src/runtime_healthcheck.py b/src/runtime_healthcheck.py new file mode 100644 index 0000000..160a0b6 --- /dev/null +++ b/src/runtime_healthcheck.py @@ -0,0 +1,188 @@ +"""Runtime HTTP healthcheck server for DB and Discord readiness probes.""" + +import asyncio +import json +import logging +import math +from contextlib import suppress +from typing import Any + +import discord +from tortoise import Tortoise +from tortoise.exceptions import ConfigurationError, DBConnectionError, OperationalError + +logger = logging.getLogger(__name__) + +_REQUEST_TIMEOUT_SECONDS = 5 +_MAX_HEARTBEAT_LATENCY_SECONDS = 10 + + +class HealthcheckServer: + """Serve HTTP healthcheck responses for runtime dependency status.""" + + def __init__( + self, + bot: discord.Bot, + *, + host: str, + port: int, + path: str = "/health", + ) -> None: + """ + Initialize the healthcheck server. + + :param bot: Discord bot instance used for runtime status checks. + :param host: Interface address for the healthcheck listener. + :param port: TCP port for the healthcheck listener. + :param path: HTTP path that serves health responses. + """ + self.bot: discord.Bot = bot + self.host: str = host + self.port: int = port + self.path: str = path + self._server: asyncio.AbstractServer | None = None + + async def start(self) -> None: + """Start listening for healthcheck HTTP requests.""" + self._server = await asyncio.start_server(self._handle_connection, host=self.host, port=self.port) + logger.info("Healthcheck server listening on http://%s:%s%s", self.host, self.port, self.path) + + async def stop(self) -> None: + """Stop the healthcheck listener if it is running.""" + if self._server is None: + return + + self._server.close() + await self._server.wait_closed() + self._server = None + + async def _handle_connection(self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None: + """ + Process a single HTTP request and return a JSON response. + + :param reader: Stream reader for the client connection. + :param writer: Stream writer for the client connection. + """ + response_status = 500 + response_body: dict[str, Any] = {"status": "error"} + + try: + request_line = await asyncio.wait_for(reader.readline(), timeout=_REQUEST_TIMEOUT_SECONDS) + if not request_line: + return + + method, raw_target, _ = request_line.decode("utf-8", errors="replace").strip().split(maxsplit=2) + await self._consume_headers(reader) + + target = raw_target.split("?", maxsplit=1)[0] + if method != "GET": + response_status = 405 + response_body = {"status": "error", "reason": "method_not_allowed"} + elif target != self.path: + response_status = 404 + response_body = {"status": "error", "reason": "not_found"} + else: + response_status, response_body = await self._health_response() + except (UnicodeDecodeError, ValueError): + response_status = 400 + response_body = {"status": "error", "reason": "bad_request"} + except TimeoutError: + response_status = 408 + response_body = {"status": "error", "reason": "request_timeout"} + finally: + writer.write(self._build_response(response_status, response_body)) + with suppress(ConnectionError): + await writer.drain() + + writer.close() + with suppress(ConnectionError): + await writer.wait_closed() + + async def _health_response(self) -> tuple[int, dict[str, Any]]: + """Build the current health payload and matching HTTP status code.""" + db_connected = await self._is_db_connected() + discord_connected = self._is_discord_connected() + discord_unrate_limited = self._is_discord_unrate_limited() + discord_heartbeat_ok = self._is_discord_heartbeat_healthy() + + checks = { + "database_connected": db_connected, + "discord_connected": discord_connected, + "discord_no_global_ratelimit": discord_unrate_limited, + "discord_heartbeats_healthy": discord_heartbeat_ok, + } + healthy = all(checks.values()) + return ( + 200 if healthy else 503, + { + "status": "ok" if healthy else "degraded", + "checks": checks, + }, + ) + + async def _is_db_connected(self) -> bool: + """Return whether the configured database connection is responsive.""" + try: + connection = Tortoise.get_connection("default") + await connection.execute_query("SELECT 1") + except (ConfigurationError, DBConnectionError, OperationalError): + return False + return True + + def _is_discord_connected(self) -> bool: + """Return whether the Discord client is ready and not closed.""" + return self.bot.is_ready() and not self.bot.is_closed() + + def _is_discord_unrate_limited(self) -> bool: + """Return whether the Discord websocket is not globally ratelimited.""" + return not self.bot.is_ws_ratelimited() + + def _is_discord_heartbeat_healthy(self) -> bool: + """Return whether Discord heartbeat latencies are within the healthy threshold.""" + if isinstance(self.bot, discord.AutoShardedClient): + return all( + math.isfinite(latency) and 0 <= latency <= _MAX_HEARTBEAT_LATENCY_SECONDS + for _, latency in self.bot.latencies + ) + + return math.isfinite(self.bot.latency) and 0 <= self.bot.latency <= _MAX_HEARTBEAT_LATENCY_SECONDS + + @staticmethod + async def _consume_headers(reader: asyncio.StreamReader) -> None: + """ + Read request headers until an empty line is reached. + + :param reader: Stream reader for the client connection. + """ + while True: + line = await asyncio.wait_for(reader.readline(), timeout=_REQUEST_TIMEOUT_SECONDS) + if not line or line in {b"\r\n", b"\n"}: + return + + @staticmethod + def _build_response(status_code: int, body: dict[str, Any]) -> bytes: + """ + Build a raw HTTP JSON response payload. + + :param status_code: HTTP status code to emit. + :param body: Response body content. + :returns: Serialized HTTP response bytes. + """ + status_text = { + 200: "OK", + 400: "Bad Request", + 404: "Not Found", + 405: "Method Not Allowed", + 408: "Request Timeout", + 500: "Internal Server Error", + 503: "Service Unavailable", + }.get(status_code, "Internal Server Error") + payload = json.dumps(body, separators=(",", ":"), sort_keys=True).encode() + headers = ( + f"HTTP/1.1 {status_code} {status_text}\r\n" + "Content-Type: application/json\r\n" + f"Content-Length: {len(payload)}\r\n" + "Connection: close\r\n" + "\r\n" + ).encode() + return headers + payload