diff --git a/.jules/sentinel.md b/.jules/sentinel.md index c704035b..46f03836 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -57,3 +57,7 @@ **Vulnerability:** Database driver exceptions can echo DSN fragments, query parameters, or assignment-style secrets after connection failures, leaking plaintext passwords through snapshot error messages and queue logs. **Learning:** Redacting only the literal DSN is not enough. Error messages may contain decoded, percent-encoded, query-string, or `password=`/`api_key=` style forms of the same secret. **Prevention:** Sanitize snapshot job errors before persisting or re-raising them, and raise sanitized exceptions with `from None` so Python exception chaining does not reattach the original secret-bearing exception. +## 2026-06-27 - DSN Password Redaction Leakage via Encoding Variations +**Vulnerability:** The DSN redactor (`redact_dsn_error_message`) previously failed to catch and replace variations of URL-encoded passwords (like `+` vs `%20` or double-encoded forms) from driver error messages. +**Learning:** `urllib.parse.urlsplit().password` returns a URL-decoded string. If the original DSN contains an encoded password (e.g. `p+ass` or `p%20ass`), and the database driver logs it in its raw or re-encoded form in the error message, naive string replacement using only the standard `quote()` variation will miss it. Furthermore, any attempt to avoid over-redaction by skipping passwords shorter than a certain length (e.g., `< 4`) creates a critical leakage vulnerability for short passwords. +**Prevention:** To reliably redact passwords from error messages, you must first decode the raw string fully to a base representation (`unquote_plus`), and then generate all possible logging variations (the decoded string, `quote()`, and `quote_plus()`) for the redaction candidates set. Never skip redacting short passwords; over-redaction is always preferable to a credential leak. diff --git a/backend/app/dsn_redaction.py b/backend/app/dsn_redaction.py index 3342c3ae..196b09a6 100644 --- a/backend/app/dsn_redaction.py +++ b/backend/app/dsn_redaction.py @@ -20,16 +20,24 @@ def _password_candidates_from_dsn(dsn: str) -> set[str]: candidates: set[str] = set() parsed = urlsplit(dsn) + def add_variations(pw: str) -> None: + if not pw: + return + + decoded = unquote_plus(pw) + candidates.add(pw) + candidates.add(decoded) + candidates.add(quote(decoded, safe="")) + candidates.add(quote_plus(decoded, safe="")) + if parsed.password: - candidates.add(parsed.password) - candidates.add(quote(parsed.password, safe="")) + add_variations(parsed.password) if "@" in parsed.netloc: userinfo = parsed.netloc.rsplit("@", 1)[0] if ":" in userinfo: raw_password = userinfo.split(":", 1)[1] - candidates.add(raw_password) - candidates.add(unquote(raw_password)) + add_variations(raw_password) for part in parsed.query.split("&"): key, sep, raw_value = part.partition("=") @@ -37,19 +45,19 @@ def _password_candidates_from_dsn(dsn: str) -> set[str]: continue if not _SECRET_KEY_PATTERN.search(unquote_plus(key)): continue - decoded_value = unquote_plus(raw_value) - candidates.add(raw_value) - candidates.add(decoded_value) - candidates.add(quote(decoded_value, safe="")) - candidates.add(quote_plus(decoded_value, safe="")) + add_variations(raw_value) return {candidate for candidate in candidates if candidate} def redact_dsn_error_message(error_message: str, dsn: str) -> str: """Redact DSN-derived secrets from a driver error message.""" - redacted = error_message + + # Apply naive replacements for all candidates. + # While this may cause over-redaction for very short passwords, it is + # the safest approach to ensure no secrets leak in error messages. for secret in sorted(_password_candidates_from_dsn(dsn), key=len, reverse=True): redacted = redacted.replace(secret, "***") + return _SECRET_ASSIGNMENT_PATTERN.sub(r"\g***", redacted)