diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index 682f2e32135..a1066bb7550 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -16,6 +16,7 @@ package main import ( "bytes" "context" + "crypto/tls" "errors" "fmt" "io" @@ -40,6 +41,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" + "go.uber.org/atomic" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/notifier" @@ -1063,3 +1065,95 @@ remote_write: // 3*shardUpdateDuration to allow for the resharding logic to run. }, 30*time.Second, time.Second) } + +func cpFile(t *testing.T, dst, src string) { + t.Helper() + data, err := os.ReadFile(src) + require.NoError(t, err) + require.NoError(t, os.WriteFile(dst, data, 0o644)) +} + +// TestClientTLSCertRotationWithoutCAFile checks that scrapes keep working after +// a client cert rotation when no ca_file is configured (cert_file + key_file only). +// Regression test for: https://github.com/prometheus/prometheus/issues/16622. +func TestClientTLSCertRotationWithoutCAFile(t *testing.T) { + t.Parallel() + tmpDir := t.TempDir() + + // The target starts serving an extra metric after client cert rotation. + // If the scrape recovers, the new series should get ingested. + var clientCertRotated atomic.Bool + target := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + fmt.Fprintln(w, "test_up 1") + if clientCertRotated.Load() { + fmt.Fprintln(w, "test_after_rotation 1") + } + })) + target.TLS = &tls.Config{ + ClientAuth: tls.RequireAnyClientCert, + } + target.StartTLS() + defer target.Close() + + clientCertFile := filepath.Join(tmpDir, "client.cer") + clientKeyFile := filepath.Join(tmpDir, "client.key") + cpFile(t, clientCertFile, filepath.Join("..", "..", "scrape", "testdata", "client.cer")) + cpFile(t, clientKeyFile, filepath.Join("..", "..", "scrape", "testdata", "client.key")) + + port := testutil.RandomUnprivilegedPort(t) + configFile := filepath.Join(tmpDir, "prometheus.yml") + require.NoError(t, os.WriteFile(configFile, fmt.Appendf(nil, ` +scrape_configs: + - job_name: target + scrape_interval: 1s + scheme: https + static_configs: + - targets: ['%s'] + tls_config: + cert_file: %s + key_file: %s + insecure_skip_verify: true +`, target.Listener.Addr().String(), clientCertFile, clientKeyFile), 0o644)) + + prom := prometheusCommandWithLogging(t, configFile, port, + "--storage.tsdb.path="+filepath.Join(tmpDir, "data")) + require.NoError(t, prom.Start()) + + promURL := "http://localhost:" + strconv.Itoa(port) + const headSeriesCreated = "prometheus_tsdb_head_series_created_total" + + getHeadSeriesCreated := func() (float64, error) { + r, err := http.Get(promURL + "/metrics") + if err != nil { + return 0, err + } + defer r.Body.Close() + b, _ := io.ReadAll(r.Body) + return getMetricValue(t, bytes.NewReader(b), model.MetricTypeCounter, headSeriesCreated) + } + + // Wait for at least one scrape to ingest series. + var seriesCreatedBeforeRotation float64 + require.Eventually(t, func() bool { + v, err := getHeadSeriesCreated() + if err != nil { + return false + } + seriesCreatedBeforeRotation = v + return seriesCreatedBeforeRotation > 0 + }, startupTime, 500*time.Millisecond) + + // Overwrite client cert files (content doesn't matter) to trigger a + // transport rebuild. + cpFile(t, clientCertFile, filepath.Join("..", "..", "scrape", "testdata", "server.cer")) + cpFile(t, clientKeyFile, filepath.Join("..", "..", "scrape", "testdata", "server.key")) + clientCertRotated.Store(true) + + require.Eventually(t, func() bool { + v, err := getHeadSeriesCreated() + if err != nil { + return false + } + return v > seriesCreatedBeforeRotation + }, 5*time.Second, 1*time.Second, "scrape should keep working after cert rotation") +}