diff --git a/VERSION b/VERSION index 62c3a60..23aee0e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v1.1.4-dev +v1.1.5-dev diff --git a/logger/logger.go b/logger/logger.go index f7db761..e9654c7 100644 --- a/logger/logger.go +++ b/logger/logger.go @@ -141,7 +141,10 @@ func Middleware(logger *slog.Logger, shouldPrint bool) func(next http.Handler) h metrics.InFlightGauge.Dec() path := r.URL.Path - if ww.Status() == http.StatusNotFound { + status := ww.Status() + // ww.Status() returns 0 when WriteHeader was never explicitly called (chi unmatched routes). + // Treat 0 the same as 404 to prevent path scanners from polluting metric cardinality. + if status == http.StatusNotFound || status == 0 { // prevent path scanners from polluting logs and messing up path / endpoint cardinality. // use a separate, dedicated key that we are not aggregating against. Keeps memory down. logger = logger.With("path_high_cardinality", path) diff --git a/logger/logger_test.go b/logger/logger_test.go index 050b453..d239afe 100644 --- a/logger/logger_test.go +++ b/logger/logger_test.go @@ -3,10 +3,76 @@ package logger import ( "bytes" "log/slog" + "net/http" + "net/http/httptest" "strings" "testing" ) +// TestMiddlewareCardinalityProtection verifies that unmatched routes — whether they +// produce an explicit 404 or no WriteHeader call at all (chi returns status 0 for +// unmatched routes) — have their paths redacted in logs and metrics to prevent +// scanner traffic from causing a cardinality explosion. +func TestMiddlewareCardinalityProtection(t *testing.T) { + tests := []struct { + name string + handler http.HandlerFunc + path string + wantRedacted bool + }{ + { + name: "known path passes through unchanged", + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }, + path: "/api/users", + wantRedacted: false, + }, + { + name: "explicit 404 triggers redaction", + handler: func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }, + path: "/wp-admin/login.php", + wantRedacted: true, + }, + { + name: "status 0 (unmatched chi route, WriteHeader never called) triggers redaction", + handler: func(w http.ResponseWriter, r *http.Request) { + // intentionally write nothing — chi returns 0 for unmatched routes + }, + path: "/.env", + wantRedacted: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var buf bytes.Buffer + log := slog.New(slog.NewJSONHandler(&buf, nil)) + mw := Middleware(log, true) + + req := httptest.NewRequest(http.MethodGet, tt.path, nil) + rr := httptest.NewRecorder() + mw(http.HandlerFunc(tt.handler)).ServeHTTP(rr, req) + + output := buf.String() + if tt.wantRedacted { + if !strings.Contains(output, "path_high_cardinality") { + t.Errorf("expected path_high_cardinality in log output, got: %s", output) + } + if !strings.Contains(output, "redacted for cardinality protection") { + t.Errorf("expected redacted path in log output, got: %s", output) + } + } else { + if strings.Contains(output, "path_high_cardinality") { + t.Errorf("did not expect path_high_cardinality in log output, got: %s", output) + } + } + }) + } +} + // TestPrintable verifies that the logger instance can be coerced over to a different // interface. slog doesn't provide Print, but the chi logger middleware needs something // that matches that interface. ergo, make sure we can coerce the logger over