Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
7fec9ae
Initial plan
Copilot Apr 14, 2026
2071551
feat: add HTTP healthcheck endpoint for bot runtime checks
Copilot Apr 14, 2026
3146615
chore: polish healthcheck shutdown exception naming
Copilot Apr 14, 2026
7543163
Address PR feedback on healthcheck imports and probe checks
Copilot Apr 14, 2026
0823976
Refine healthcheck probes per PR feedback
Copilot Apr 14, 2026
e0ba7f9
Simplify import
ToothyDev Apr 14, 2026
dd82e39
Tighten heartbeat latency health threshold
Copilot Apr 14, 2026
8bdd1db
Add healthcheck module and method docstrings
Copilot Apr 14, 2026
eb4504d
Expand healthcheck module docstring scope
Copilot Apr 14, 2026
863b088
Align module docstring ratelimit terminology
Copilot Apr 14, 2026
0324a43
Refactor healthcheck startup into cog and add Docker HEALTHCHECK
Copilot Apr 18, 2026
9943694
Parameterize Docker healthcheck and cog name constant
Copilot Apr 18, 2026
671bea8
Switch Docker HEALTHCHECK to wget probe
Copilot Apr 18, 2026
372b9e5
Apply healthcheck review changes for Docker defaults and endpoint path
Copilot Apr 18, 2026
e9781b5
Allow disabling healthcheck via empty host and remove app-side defaults
Copilot Apr 19, 2026
1f61ca7
Refine healthcheck env typing and clarify disable docs
Copilot Apr 19, 2026
85443ee
Clarify env variable name in healthcheck port error
Copilot Apr 19, 2026
e7d5a49
Include configured host value in healthcheck port error
Copilot Apr 19, 2026
96b148c
Validate HEALTHCHECK_PORT in cog with explicit runtime errors
Copilot Apr 19, 2026
0d8ec97
Deduplicate healthcheck port validation error message
Copilot Apr 19, 2026
e2e2cfb
Differentiate missing vs invalid HEALTHCHECK_PORT errors
Copilot Apr 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
TOKEN="YOUR BOT TOKEN HERE"
TOKEN="YOUR BOT TOKEN HERE"
HEALTHCHECK_HOST="127.0.0.1"
HEALTHCHECK_PORT="8080"
8 changes: 8 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ FROM python:${PYTHON_VERSION}-slim-bookworm AS python-base

ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV HEALTHCHECK_HOST=127.0.0.1
ENV HEALTHCHECK_PORT=8080

RUN pip install uv

Expand All @@ -19,6 +21,8 @@ ENV PYTHONUNBUFFERED=1

WORKDIR /app

RUN apt-get update && apt-get install -y --no-install-recommends wget && rm -rf /var/lib/apt/lists/*

RUN adduser -u 8192 --disabled-password --gecos "" appuser && chown -R appuser /app

COPY --from=python-base --chown=appuser /app/requirements.txt ./
Expand All @@ -28,4 +32,8 @@ RUN pip install -r requirements.txt
COPY src/ ./src
USER appuser

EXPOSE 8080

HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 CMD ["sh", "-c", "wget -q -T 3 -O /dev/null \"http://${HEALTHCHECK_HOST}:${HEALTHCHECK_PORT}/health\""]

CMD ["python", "-m", "src"]
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,22 @@ Versa is a simple utility Discord bot, with the main goal of being open source a

#### ⚠ **Further support with self-hosting will not be provided.** ⚠

### Healthcheck endpoint

The bot exposes an HTTP healthcheck endpoint for deployment platforms (such as Coolify).

- Method/path: `GET /health`
- Docker default bind: `127.0.0.1:8080` (set via `ENV` in `Dockerfile`)
- Config via env vars:
- `HEALTHCHECK_HOST`
- `HEALTHCHECK_PORT`
- Leave `HEALTHCHECK_HOST` unset or set to an empty string to disable the healthcheck server

The endpoint returns:
- `200` when DB is responsive, Discord is connected, Discord shard heartbeat latency is healthy, and no global Discord rate-limit is active
- `503` when any check is failing

# License

This project is licensed under AGPL-3.0. Forks and redistributions must remain open-source. See the LICENSE file for
further info
further info
22 changes: 20 additions & 2 deletions src/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import discord

from src import log_setup
from src.cogs.healthcheck import HEALTHCHECK_COG_NAME, HealthcheckCog
from src.config import TOKEN
from src.database import init_db, shutdown_db

from .config import TOKEN

log_setup.setup_logging(logging.INFO)
logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -40,14 +40,32 @@ async def start() -> None:
except Exception as e: # noqa: BLE001
original_exc = e
finally:
healthcheck_stop_exc = None
healthcheck_cog = bot.get_cog(HEALTHCHECK_COG_NAME)
try:
if isinstance(healthcheck_cog, HealthcheckCog):
await healthcheck_cog.stop_server()
except Exception as healthcheck_exc: # noqa: BLE001
healthcheck_stop_exc = healthcheck_exc

try:
await shutdown_db()
except Exception as e2:
if original_exc:
msg = "Multiple errors happened when starting the bot"

raise ExceptionGroup(msg, [original_exc, e2]) from None
if healthcheck_stop_exc:
msg = "Multiple errors happened during shutdown"

raise ExceptionGroup(msg, [healthcheck_stop_exc, e2]) from None
raise
if healthcheck_stop_exc:
if original_exc:
msg = "Multiple errors happened when starting the bot"

raise ExceptionGroup(msg, [original_exc, healthcheck_stop_exc]) from None
raise healthcheck_stop_exc
if original_exc:
raise original_exc

Expand Down
56 changes: 56 additions & 0 deletions src/cogs/healthcheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import logging

import discord

from src.config import HEALTHCHECK_HOST, HEALTHCHECK_PORT_RAW
from src.runtime_healthcheck import HealthcheckServer

logger = logging.getLogger(__name__)
HEALTHCHECK_COG_NAME = "healthcheck"


class HealthcheckCog(discord.Cog, name=HEALTHCHECK_COG_NAME):
def __init__(self, bot: discord.Bot) -> None:
self.bot: discord.Bot = bot
self.healthcheck_server: HealthcheckServer | None = None
if HEALTHCHECK_HOST:
if HEALTHCHECK_PORT_RAW is None:
msg = (
"Environment variable HEALTHCHECK_PORT must be set when "
f"HEALTHCHECK_HOST is configured (HEALTHCHECK_HOST={HEALTHCHECK_HOST})"
)
raise RuntimeError(msg)

try:
healthcheck_port = int(HEALTHCHECK_PORT_RAW)
except ValueError as e:
msg = (
"Environment variable HEALTHCHECK_PORT must be a valid integer when "
"HEALTHCHECK_HOST is configured "
f"(HEALTHCHECK_HOST={HEALTHCHECK_HOST}, HEALTHCHECK_PORT={HEALTHCHECK_PORT_RAW})"
)
raise RuntimeError(msg) from e

self.healthcheck_server = HealthcheckServer(
bot,
host=HEALTHCHECK_HOST,
port=healthcheck_port,
)

@discord.Cog.listener(once=True)
async def on_connect(self) -> None:
if self.healthcheck_server is None:
logger.info("Healthcheck server disabled because HEALTHCHECK_HOST is unset/empty")
return

await self.healthcheck_server.start()
logger.info("Healthcheck server started from healthcheck cog")

async def stop_server(self) -> None:
if self.healthcheck_server is None:
return
await self.healthcheck_server.stop()


def setup(bot: discord.Bot) -> None:
bot.add_cog(HealthcheckCog(bot))
3 changes: 3 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@

DB_PATH = Path(os.getenv("DB_PATH") or Path("data/database.db")).absolute()
DB_PATH.parent.mkdir(parents=True, exist_ok=True)

HEALTHCHECK_HOST = os.getenv("HEALTHCHECK_HOST")
HEALTHCHECK_PORT_RAW = os.getenv("HEALTHCHECK_PORT")
188 changes: 188 additions & 0 deletions src/runtime_healthcheck.py
Comment thread
ToothyDev marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""Runtime HTTP healthcheck server for DB and Discord readiness probes."""

import asyncio
import json
import logging
import math
from contextlib import suppress
from typing import Any

import discord
from tortoise import Tortoise
from tortoise.exceptions import ConfigurationError, DBConnectionError, OperationalError

logger = logging.getLogger(__name__)

_REQUEST_TIMEOUT_SECONDS = 5
_MAX_HEARTBEAT_LATENCY_SECONDS = 10


class HealthcheckServer:
"""Serve HTTP healthcheck responses for runtime dependency status."""

def __init__(
self,
bot: discord.Bot,
*,
host: str,
port: int,
path: str = "/health",
) -> None:
"""
Initialize the healthcheck server.

:param bot: Discord bot instance used for runtime status checks.
:param host: Interface address for the healthcheck listener.
:param port: TCP port for the healthcheck listener.
:param path: HTTP path that serves health responses.
"""
self.bot: discord.Bot = bot
self.host: str = host
self.port: int = port
self.path: str = path
self._server: asyncio.AbstractServer | None = None

async def start(self) -> None:
"""Start listening for healthcheck HTTP requests."""
self._server = await asyncio.start_server(self._handle_connection, host=self.host, port=self.port)
logger.info("Healthcheck server listening on http://%s:%s%s", self.host, self.port, self.path)

async def stop(self) -> None:
"""Stop the healthcheck listener if it is running."""
if self._server is None:
return

self._server.close()
await self._server.wait_closed()
self._server = None

async def _handle_connection(self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
"""
Process a single HTTP request and return a JSON response.

:param reader: Stream reader for the client connection.
:param writer: Stream writer for the client connection.
"""
response_status = 500
response_body: dict[str, Any] = {"status": "error"}

try:
request_line = await asyncio.wait_for(reader.readline(), timeout=_REQUEST_TIMEOUT_SECONDS)
if not request_line:
return

method, raw_target, _ = request_line.decode("utf-8", errors="replace").strip().split(maxsplit=2)
await self._consume_headers(reader)

target = raw_target.split("?", maxsplit=1)[0]
if method != "GET":
response_status = 405
response_body = {"status": "error", "reason": "method_not_allowed"}
elif target != self.path:
response_status = 404
response_body = {"status": "error", "reason": "not_found"}
else:
response_status, response_body = await self._health_response()
except (UnicodeDecodeError, ValueError):
response_status = 400
response_body = {"status": "error", "reason": "bad_request"}
except TimeoutError:
response_status = 408
response_body = {"status": "error", "reason": "request_timeout"}
finally:
writer.write(self._build_response(response_status, response_body))
with suppress(ConnectionError):
await writer.drain()

writer.close()
with suppress(ConnectionError):
await writer.wait_closed()

async def _health_response(self) -> tuple[int, dict[str, Any]]:
"""Build the current health payload and matching HTTP status code."""
db_connected = await self._is_db_connected()
discord_connected = self._is_discord_connected()
discord_unrate_limited = self._is_discord_unrate_limited()
discord_heartbeat_ok = self._is_discord_heartbeat_healthy()

checks = {
"database_connected": db_connected,
"discord_connected": discord_connected,
"discord_no_global_ratelimit": discord_unrate_limited,
"discord_heartbeats_healthy": discord_heartbeat_ok,
}
healthy = all(checks.values())
return (
200 if healthy else 503,
{
"status": "ok" if healthy else "degraded",
"checks": checks,
},
)

async def _is_db_connected(self) -> bool:
"""Return whether the configured database connection is responsive."""
try:
connection = Tortoise.get_connection("default")
await connection.execute_query("SELECT 1")
except (ConfigurationError, DBConnectionError, OperationalError):
return False
return True

def _is_discord_connected(self) -> bool:
"""Return whether the Discord client is ready and not closed."""
return self.bot.is_ready() and not self.bot.is_closed()

def _is_discord_unrate_limited(self) -> bool:
"""Return whether the Discord websocket is not globally ratelimited."""
return not self.bot.is_ws_ratelimited()
Comment thread
ToothyDev marked this conversation as resolved.

def _is_discord_heartbeat_healthy(self) -> bool:
"""Return whether Discord heartbeat latencies are within the healthy threshold."""
if isinstance(self.bot, discord.AutoShardedClient):
return all(
math.isfinite(latency) and 0 <= latency <= _MAX_HEARTBEAT_LATENCY_SECONDS
for _, latency in self.bot.latencies
)

return math.isfinite(self.bot.latency) and 0 <= self.bot.latency <= _MAX_HEARTBEAT_LATENCY_SECONDS

@staticmethod
async def _consume_headers(reader: asyncio.StreamReader) -> None:
"""
Read request headers until an empty line is reached.

:param reader: Stream reader for the client connection.
"""
while True:
line = await asyncio.wait_for(reader.readline(), timeout=_REQUEST_TIMEOUT_SECONDS)
if not line or line in {b"\r\n", b"\n"}:
return

@staticmethod
def _build_response(status_code: int, body: dict[str, Any]) -> bytes:
"""
Build a raw HTTP JSON response payload.

:param status_code: HTTP status code to emit.
:param body: Response body content.
:returns: Serialized HTTP response bytes.
"""
status_text = {
200: "OK",
400: "Bad Request",
404: "Not Found",
405: "Method Not Allowed",
408: "Request Timeout",
500: "Internal Server Error",
503: "Service Unavailable",
}.get(status_code, "Internal Server Error")
payload = json.dumps(body, separators=(",", ":"), sort_keys=True).encode()
headers = (
f"HTTP/1.1 {status_code} {status_text}\r\n"
"Content-Type: application/json\r\n"
f"Content-Length: {len(payload)}\r\n"
"Connection: close\r\n"
"\r\n"
).encode()
return headers + payload
Loading