From fe6c60315938c9856ff2cbdc125dc84bf3f38670 Mon Sep 17 00:00:00 2001 From: Alex Harford Date: Tue, 25 Nov 2025 13:37:59 -0800 Subject: [PATCH 1/2] Include prometheus and alertmanager status in monitor status * Include prometheus and alertmanager status in monitor status The monitor should be unavailable if there is a prometheus or alertmanager instance that is unavailable. This handles the case where the prometheus install failed, and the statefulsets haven't been created yet. * monitor: Only check prometheus / alertmanager Available status --- pkg/controller/monitor/monitor_controller.go | 43 ++++++ .../monitor/monitor_controller_test.go | 136 +++++++++++++++++- pkg/controller/utils/utils.go | 26 ++++ 3 files changed, 203 insertions(+), 2 deletions(-) diff --git a/pkg/controller/monitor/monitor_controller.go b/pkg/controller/monitor/monitor_controller.go index cf63f241c4..7593e2a077 100644 --- a/pkg/controller/monitor/monitor_controller.go +++ b/pkg/controller/monitor/monitor_controller.go @@ -22,6 +22,7 @@ import ( crdv1 "github.com/tigera/operator/pkg/apis/crd.projectcalico.org/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -430,6 +431,48 @@ func (r *ReconcileMonitor) Reconcile(ctx context.Context, request reconcile.Requ } } + p, err := utils.GetPrometheus(ctx, r.client) + if err != nil { + r.status.SetDegraded(operatorv1.ResourceReadError, "An error occurred trying to retrieve the Prometheus status", err, reqLogger) + return reconcile.Result{}, err + } + + if p != nil { + available := monitoringv1.ConditionFalse + + for _, cond := range p.Status.Conditions { + if cond.Type == monitoringv1.Available { + available = cond.Status + } + } + + if available != monitoringv1.ConditionTrue { + r.status.SetDegraded(operatorv1.ResourceNotReady, "Prometheus component is not available", err, reqLogger) + return reconcile.Result{}, err + } + } + + am, err := utils.GetAlertmanager(ctx, r.client) + if err != nil { + r.status.SetDegraded(operatorv1.ResourceReadError, "An error occurred trying to retrieve the Alertmanager status", err, reqLogger) + return reconcile.Result{}, err + } + + if am != nil { + available := monitoringv1.ConditionFalse + + for _, cond := range am.Status.Conditions { + if cond.Type == monitoringv1.Available { + available = cond.Status + } + } + + if available != monitoringv1.ConditionTrue { + r.status.SetDegraded(operatorv1.ResourceNotReady, "Alertmanager component is not available", err, reqLogger) + return reconcile.Result{}, err + } + } + // Tell the status manager that we're ready to monitor the resources we've told it about and receive statuses. r.status.ReadyToMonitor() diff --git a/pkg/controller/monitor/monitor_controller_test.go b/pkg/controller/monitor/monitor_controller_test.go index 6960bcea18..04edcb8f51 100644 --- a/pkg/controller/monitor/monitor_controller_test.go +++ b/pkg/controller/monitor/monitor_controller_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2024 Tigera, Inc. All rights reserved. +// Copyright (c) 2021-2025 Tigera, Inc. All rights reserved. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -68,10 +68,14 @@ var _ = Describe("Monitor controller tests", func() { Expect(apis.AddToScheme(scheme)).NotTo(HaveOccurred()) Expect(appsv1.SchemeBuilder.AddToScheme(scheme)).NotTo(HaveOccurred()) Expect(rbacv1.SchemeBuilder.AddToScheme(scheme)).NotTo(HaveOccurred()) + Expect(monitoringv1.AddToScheme(scheme)).NotTo(HaveOccurred()) // Create a client that will have a crud interface of k8s objects. ctx = context.Background() - cli = ctrlrfake.DefaultFakeClientBuilder(scheme).Build() + cli = ctrlrfake.DefaultFakeClientBuilder(scheme). + WithStatusSubresource(&monitoringv1.Prometheus{}). + WithStatusSubresource(&monitoringv1.Alertmanager{}). + Build() // Create an object we can use throughout the test to do the monitor reconcile loops. mockStatus = &status.MockStatus{} @@ -85,6 +89,7 @@ var _ = Describe("Monitor controller tests", func() { mockStatus.On("ReadyToMonitor") mockStatus.On("RemoveDeployments", mock.Anything) mockStatus.On("RemoveCertificateSigningRequests", common.TigeraPrometheusNamespace) + mockStatus.On("SetDegraded", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return() mockStatus.On("SetMetaData", mock.Anything).Return() // Create an object we can use throughout the test to do the monitor reconcile loops. @@ -135,6 +140,133 @@ var _ = Describe("Monitor controller tests", func() { r.tierWatchReady.MarkAsReady() }) + Context("prometheus resources", func() { + BeforeEach(func() { + // Add the Prometheus and Alertmanager instances + prom := &monitoringv1.Prometheus{ + ObjectMeta: metav1.ObjectMeta{ + Name: monitor.CalicoNodePrometheus, + Namespace: common.TigeraPrometheusNamespace, + }, + } + Expect(cli.Create(ctx, prom)).To(BeNil()) + + prom.Status = monitoringv1.PrometheusStatus{ + Conditions: []monitoringv1.Condition{ + { + Type: monitoringv1.Available, + Status: monitoringv1.ConditionTrue, + }, + { + Type: monitoringv1.Reconciled, + Status: monitoringv1.ConditionTrue, + }, + }, + } + Expect(cli.Status().Update(ctx, prom)).To(Succeed()) + + alertManager := &monitoringv1.Alertmanager{ + ObjectMeta: metav1.ObjectMeta{ + Name: monitor.CalicoNodeAlertmanager, + Namespace: common.TigeraPrometheusNamespace, + }, + } + Expect(cli.Create(ctx, alertManager)).To(BeNil()) + + alertManager.Status = monitoringv1.AlertmanagerStatus{ + Conditions: []monitoringv1.Condition{ + { + Type: monitoringv1.Available, + Status: monitoringv1.ConditionTrue, + }, + { + Type: monitoringv1.Reconciled, + Status: monitoringv1.ConditionTrue, + }, + }, + } + Expect(cli.Status().Update(ctx, alertManager)).To(Succeed()) + }) + + It("should be ready if the prometheus statefulset is ready", func() { + _, err := r.Reconcile(ctx, reconcile.Request{}) + Expect(err).ShouldNot(HaveOccurred()) + + mockStatus.AssertNotCalled(GinkgoT(), "SetDegraded", + operatorv1.ResourceNotReady, + mock.Anything, + mock.Anything, + mock.Anything, + ) + }) + + It("should be ready if the alertmanager statefulset is ready", func() { + _, err := r.Reconcile(ctx, reconcile.Request{}) + Expect(err).ShouldNot(HaveOccurred()) + + mockStatus.AssertNotCalled(GinkgoT(), "SetDegraded", + operatorv1.ResourceNotReady, + mock.Anything, + mock.Anything, + mock.Anything, + ) + }) + + It("should degrade if the prometheus statefulset isn't ready", func() { + prom := &monitoringv1.Prometheus{} + Expect(cli.Get(ctx, client.ObjectKey{Name: monitor.CalicoNodePrometheus, Namespace: common.TigeraPrometheusNamespace}, prom)).NotTo(HaveOccurred()) + + prom.Status.Conditions = []monitoringv1.Condition{ + { + Type: monitoringv1.Available, + Status: monitoringv1.ConditionFalse, + }, + { + Type: monitoringv1.Reconciled, + Status: monitoringv1.ConditionTrue, + }, + } + Expect(cli.Status().Update(ctx, prom)).To(Succeed()) + + _, err := r.Reconcile(ctx, reconcile.Request{}) + Expect(err).ShouldNot(HaveOccurred()) + + mockStatus.AssertCalled(GinkgoT(), "SetDegraded", + operatorv1.ResourceNotReady, + "Prometheus component is not available", + mock.Anything, + mock.Anything, + ) + }) + + It("should degrade if the alertmanager statefulset isn't ready", func() { + alertManager := &monitoringv1.Alertmanager{} + Expect(cli.Get(ctx, client.ObjectKey{Name: monitor.CalicoNodeAlertmanager, Namespace: common.TigeraPrometheusNamespace}, alertManager)).NotTo(HaveOccurred()) + + alertManager.Status.Conditions = []monitoringv1.Condition{ + { + Type: monitoringv1.Available, + Status: monitoringv1.ConditionFalse, + }, + { + Type: monitoringv1.Reconciled, + Status: monitoringv1.ConditionTrue, + }, + } + Expect(cli.Status().Update(ctx, alertManager)).To(Succeed()) + + _, err := r.Reconcile(ctx, reconcile.Request{}) + Expect(err).ShouldNot(HaveOccurred()) + + mockStatus.AssertCalled(GinkgoT(), "SetDegraded", + operatorv1.ResourceNotReady, + "Alertmanager component is not available", + mock.Anything, + mock.Anything, + ) + }) + }) + Context("controller reconciliation", func() { var ( am = &monitoringv1.Alertmanager{} diff --git a/pkg/controller/utils/utils.go b/pkg/controller/utils/utils.go index 563bae5191..5ed404b503 100644 --- a/pkg/controller/utils/utils.go +++ b/pkg/controller/utils/utils.go @@ -25,6 +25,7 @@ import ( esv1 "github.com/elastic/cloud-on-k8s/v2/pkg/apis/elasticsearch/v1" "github.com/elastic/cloud-on-k8s/v2/pkg/utils/stringsutil" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" csiv1 "sigs.k8s.io/secrets-store-csi-driver/apis/v1" "github.com/go-logr/logr" @@ -58,6 +59,7 @@ import ( "github.com/tigera/operator/pkg/ctrlruntime" "github.com/tigera/operator/pkg/render" "github.com/tigera/operator/pkg/render/logstorage/eck" + "github.com/tigera/operator/pkg/render/monitor" ) const ( @@ -859,6 +861,30 @@ func GetElasticsearch(ctx context.Context, c client.Client) (*esv1.Elasticsearch return &es, nil } +func GetAlertmanager(ctx context.Context, c client.Client) (*monitoringv1.Alertmanager, error) { + a := monitoringv1.Alertmanager{} + err := c.Get(ctx, client.ObjectKey{Name: monitor.CalicoNodeAlertmanager, Namespace: common.TigeraPrometheusNamespace}, &a) + if err != nil { + if errors.IsNotFound(err) { + return nil, nil + } + return nil, err + } + return &a, nil +} + +func GetPrometheus(ctx context.Context, c client.Client) (*monitoringv1.Prometheus, error) { + p := monitoringv1.Prometheus{} + err := c.Get(ctx, client.ObjectKey{Name: monitor.CalicoNodePrometheus, Namespace: common.TigeraPrometheusNamespace}, &p) + if err != nil { + if errors.IsNotFound(err) { + return nil, nil + } + return nil, err + } + return &p, nil +} + // AddKubeProxyWatch creates a watch on the kube-proxy DaemonSet. func AddKubeProxyWatch(c ctrlruntime.Controller) error { ds := &appsv1.DaemonSet{ From feeb574ec95ccf3af6a4af3d30ef17f0d0d55230 Mon Sep 17 00:00:00 2001 From: Alex Harford Date: Tue, 6 Jan 2026 13:58:10 -0800 Subject: [PATCH 2/2] Update copyright year to 2026 --- pkg/controller/monitor/monitor_controller_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/monitor/monitor_controller_test.go b/pkg/controller/monitor/monitor_controller_test.go index 04edcb8f51..2c7faa586a 100644 --- a/pkg/controller/monitor/monitor_controller_test.go +++ b/pkg/controller/monitor/monitor_controller_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2021-2025 Tigera, Inc. All rights reserved. +// Copyright (c) 2021-2026 Tigera, Inc. All rights reserved. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License.