From 4a569304754df95173c1993000b6ef67b317e121 Mon Sep 17 00:00:00 2001 From: Haseeb Tariq Date: Tue, 24 Feb 2026 12:13:12 -0800 Subject: [PATCH] OCPBUGS-74151: Wait for revision stability before removing etcd members Previously, the ClusterMemberRemovalController would remove etcd members during revision rollouts, causing cluster degradation when simultaneously deleting multiple control plane machines with the OnDelete strategy. During a revision rollout, etcd members can temporarily appear unhealthy while their pods are reinstalled to the latest revision. This is different from members being indefinitely unhealthy on a stable revision. Additionally, the EtcdEndpointsController pauses during revision rollouts, so when a replacement machine is added and triggers a rollout, the etcd-endpoints configmap won't update. This causes API servers on the old revision to use removed member endpoints, leading to API unavailability. This change adds a revision stability check before allowing member removal, ensuring we only remove members when revisions are stable and unhealthy members are truly unhealthy. This explicitly codifies the 4.17 behavior where the operator waited for all revisions to complete before removing members and lifecycle hooks. Additionally, the ClusterMemberRemovalController now verifies that the live etcd membership matches the configmap before proceeding with member removal, preventing potential issues during rapid member deletion (cherry picked from commit 016873387965eeb73b7a8526f2d9ec788e2caa50) --- pkg/operator/ceohelpers/common.go | 10 ++ .../clustermemberremovalcontroller.go | 64 ++++++++ .../clustermemberremovalcontroller_test.go | 138 ++++++++++++++++++ 3 files changed, 212 insertions(+) diff --git a/pkg/operator/ceohelpers/common.go b/pkg/operator/ceohelpers/common.go index 2ca01abcf1..a6a37af5c3 100644 --- a/pkg/operator/ceohelpers/common.go +++ b/pkg/operator/ceohelpers/common.go @@ -202,6 +202,16 @@ func VotingMemberIPListSet(ctx context.Context, cli etcdcli.EtcdClient) (sets.St return currentVotingMemberIPListSet, nil } +// MemberIPSetFromConfigMap extracts member IP addresses from the etcd-endpoints configmap +// into a set for comparison with live etcd membership +func MemberIPSetFromConfigMap(cm *corev1.ConfigMap) sets.String { + memberIPs := sets.NewString() + for _, ip := range cm.Data { + memberIPs.Insert(ip) + } + return memberIPs +} + // RevisionRolloutInProgress will return true if any node status reports its target revision is different from the current revision and the latest known revision. func RevisionRolloutInProgress(status operatorv1.StaticPodOperatorStatus) bool { latestRevision := status.LatestAvailableRevision diff --git a/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller.go b/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller.go index d20399363e..0693b0d319 100644 --- a/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller.go +++ b/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller.go @@ -112,6 +112,45 @@ func (c *clusterMemberRemovalController) sync(ctx context.Context, syncCtx facto return nil } + // Removing multiple members in the middle of a revision rollout can cause API unavailability + // when simultaneously deleting multiple machines with the controlplanemachineset "OnDelete" strategy, + // i.e we have multiple machines pending deletion and multiple new replacements added + // + // This controller currently has conditions to allow scaling down unhealthy members whose machines + // are pending deletion. However a revision rollout can temporarily make an etcd member seem unhealthy while + // the etcd pod is reinstalled to the latest revision. + // This is different from when the member is indefinitely unhealthy when the revision is stable. + // + // Additionally the EtcdEndpointsController pauses while a revision rollout is in progress + // So initially if the etcd-endpoints configmap is updated from 3->4 when the first replacement machine + // is added to the membership, a revision rollout will start and the configmap won't update in this period. + // But the ClusterMemberRemovalController will still delete a seemingly unhealthy machine during rollout + // The API servers on the old revision will neither see the new replacement etcd endpoint, and will also + // be using a removed member's endpoint. + // + // Moreover the EtcdEndpointsController uses the live etcd membership list to make scale down considerations for etcd quorum so the etcd-endpoints configmap always lags behind it. + // + // So the EtcdEndpointsController skips until the revision is stable so we remove members one at a time and unhealthy members are truly unhealthy + revisionStable, err := ceohelpers.IsRevisionStable(c.operatorClient) + if err != nil { + return fmt.Errorf("couldn't determine stability of revisions: %w", err) + } + if !revisionStable { + klog.V(2).Infof("skipping due to revision in progress") + return nil + } + + // Ensure the live etcd membership matches the etcd-endpoints configmap. + // This prevents removing multiple members in quick succession before the configmap is updated + // and propagated through a revision rollout. + etcdEndpointsUpdated, err := c.isEtcdEndpointsUpdated(ctx) + if err != nil { + return fmt.Errorf("failed to check if etcd endpoints are updated: %w", err) + } + if !etcdEndpointsUpdated { + return nil + } + // only attempt to scale down if the machine API is functional if isFunctional, err := c.machineAPIChecker.IsFunctional(); err != nil { return err @@ -513,3 +552,28 @@ func machineFailureDomainCountByIndex(machines []*machinev1beta1.Machine) map[in } return count } + +// isEtcdEndpointsUpdated checks whether the live etcd membership matches the etcd-endpoints configmap. +// This prevents removing multiple members in quick succession before the configmap is updated +// and propagated through a revision rollout. +func (c *clusterMemberRemovalController) isEtcdEndpointsUpdated(ctx context.Context) (bool, error) { + liveVotingMemberIPs, err := ceohelpers.VotingMemberIPListSet(ctx, c.etcdClient) + if err != nil { + return false, fmt.Errorf("failed to get live voting member IPs: %w", err) + } + + etcdEndpointsConfigMap, err := c.configMapListerForTargetNamespace.Get("etcd-endpoints") + if err != nil { + return false, fmt.Errorf("failed to get etcd-endpoints configmap: %w", err) + } + + configMapMemberIPs := ceohelpers.MemberIPSetFromConfigMap(etcdEndpointsConfigMap) + + if !liveVotingMemberIPs.Equal(configMapMemberIPs) { + klog.V(2).Infof("skipping member removal: live etcd membership differs from etcd-endpoints configmap. Live members: %v, ConfigMap members: %v", + liveVotingMemberIPs.List(), configMapMemberIPs.List()) + return false, nil + } + + return true, nil +} diff --git a/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller_test.go b/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller_test.go index f680749af2..f3a7619af2 100644 --- a/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller_test.go +++ b/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller_test.go @@ -1292,6 +1292,144 @@ func wellKnownMasterMachines() []runtime.Object { } } +func TestIsEtcdEndpointsUpdated(t *testing.T) { + scenarios := []struct { + name string + initialObjectsForConfigMapTargetNSLister []runtime.Object + initialEtcdMemberList []*etcdserverpb.Member + expectedResult bool + expectError bool + expectedErrorMsg string + }{ + { + name: "live membership equals configmap membership", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{ + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd-endpoints", Namespace: "openshift-etcd"}, + Data: map[string]string{ + "m-1": "10.0.139.78", + "m-2": "10.0.139.79", + "m-3": "10.0.139.80", + }, + }, + }, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + {Name: "m-2", ID: 2, PeerURLs: []string{"https://10.0.139.79:1234"}}, + {Name: "m-3", ID: 3, PeerURLs: []string{"https://10.0.139.80:1234"}}, + }, + expectedResult: true, + expectError: false, + }, + { + name: "live membership has more members than configmap (scaling up)", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{ + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd-endpoints", Namespace: "openshift-etcd"}, + Data: map[string]string{ + "m-1": "10.0.139.78", + "m-2": "10.0.139.79", + "m-3": "10.0.139.80", + }, + }, + }, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + {Name: "m-2", ID: 2, PeerURLs: []string{"https://10.0.139.79:1234"}}, + {Name: "m-3", ID: 3, PeerURLs: []string{"https://10.0.139.80:1234"}}, + {Name: "m-4", ID: 4, PeerURLs: []string{"https://10.0.139.81:1234"}}, + }, + expectedResult: false, + expectError: false, + }, + { + name: "live membership has fewer members than configmap (scaling down)", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{ + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd-endpoints", Namespace: "openshift-etcd"}, + Data: map[string]string{ + "m-1": "10.0.139.78", + "m-2": "10.0.139.79", + "m-3": "10.0.139.80", + "m-4": "10.0.139.81", + }, + }, + }, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + {Name: "m-2", ID: 2, PeerURLs: []string{"https://10.0.139.79:1234"}}, + {Name: "m-3", ID: 3, PeerURLs: []string{"https://10.0.139.80:1234"}}, + }, + expectedResult: false, + expectError: false, + }, + { + name: "live membership differs from configmap (different IPs)", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{ + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd-endpoints", Namespace: "openshift-etcd"}, + Data: map[string]string{ + "m-1": "10.0.139.78", + "m-2": "10.0.139.79", + "m-3": "10.0.139.80", + }, + }, + }, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + {Name: "m-2", ID: 2, PeerURLs: []string{"https://10.0.139.79:1234"}}, + {Name: "m-4", ID: 4, PeerURLs: []string{"https://10.0.139.81:1234"}}, + }, + expectedResult: false, + expectError: false, + }, + { + name: "configmap not found", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{}, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + }, + expectedResult: false, + expectError: true, + expectedErrorMsg: "failed to get etcd-endpoints configmap", + }, + } + + for _, scenario := range scenarios { + t.Run(scenario.name, func(t *testing.T) { + // setup test data + configMapTargetNSIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) + for _, initialObj := range scenario.initialObjectsForConfigMapTargetNSLister { + configMapTargetNSIndexer.Add(initialObj) + } + configMapTargetNSLister := corev1listers.NewConfigMapLister(configMapTargetNSIndexer).ConfigMaps("openshift-etcd") + + fakeEtcdClient, err := etcdcli.NewFakeEtcdClient(scenario.initialEtcdMemberList) + require.NoError(t, err) + + // create controller instance + target := clusterMemberRemovalController{ + etcdClient: fakeEtcdClient, + configMapListerForTargetNamespace: configMapTargetNSLister, + } + + // act + result, err := target.isEtcdEndpointsUpdated(context.TODO()) + + // assert + if scenario.expectError { + require.Error(t, err) + if scenario.expectedErrorMsg != "" { + require.Contains(t, err.Error(), scenario.expectedErrorMsg) + } + } else { + require.NoError(t, err) + require.Equal(t, scenario.expectedResult, result, "isEtcdEndpointsUpdated returned unexpected result") + } + }) + } +} + var wellKnownReplicasCountSet = ` { "controlPlane": {"replicas": 3}