diff --git a/pkg/operator/ceohelpers/common.go b/pkg/operator/ceohelpers/common.go index 2ca01abcf1..a6a37af5c3 100644 --- a/pkg/operator/ceohelpers/common.go +++ b/pkg/operator/ceohelpers/common.go @@ -202,6 +202,16 @@ func VotingMemberIPListSet(ctx context.Context, cli etcdcli.EtcdClient) (sets.St return currentVotingMemberIPListSet, nil } +// MemberIPSetFromConfigMap extracts member IP addresses from the etcd-endpoints configmap +// into a set for comparison with live etcd membership +func MemberIPSetFromConfigMap(cm *corev1.ConfigMap) sets.String { + memberIPs := sets.NewString() + for _, ip := range cm.Data { + memberIPs.Insert(ip) + } + return memberIPs +} + // RevisionRolloutInProgress will return true if any node status reports its target revision is different from the current revision and the latest known revision. func RevisionRolloutInProgress(status operatorv1.StaticPodOperatorStatus) bool { latestRevision := status.LatestAvailableRevision diff --git a/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller.go b/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller.go index d20399363e..0693b0d319 100644 --- a/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller.go +++ b/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller.go @@ -112,6 +112,45 @@ func (c *clusterMemberRemovalController) sync(ctx context.Context, syncCtx facto return nil } + // Removing multiple members in the middle of a revision rollout can cause API unavailability + // when simultaneously deleting multiple machines with the controlplanemachineset "OnDelete" strategy, + // i.e we have multiple machines pending deletion and multiple new replacements added + // + // This controller currently has conditions to allow scaling down unhealthy members whose machines + // are pending deletion. However a revision rollout can temporarily make an etcd member seem unhealthy while + // the etcd pod is reinstalled to the latest revision. + // This is different from when the member is indefinitely unhealthy when the revision is stable. + // + // Additionally the EtcdEndpointsController pauses while a revision rollout is in progress + // So initially if the etcd-endpoints configmap is updated from 3->4 when the first replacement machine + // is added to the membership, a revision rollout will start and the configmap won't update in this period. + // But the ClusterMemberRemovalController will still delete a seemingly unhealthy machine during rollout + // The API servers on the old revision will neither see the new replacement etcd endpoint, and will also + // be using a removed member's endpoint. + // + // Moreover the EtcdEndpointsController uses the live etcd membership list to make scale down considerations for etcd quorum so the etcd-endpoints configmap always lags behind it. + // + // So the EtcdEndpointsController skips until the revision is stable so we remove members one at a time and unhealthy members are truly unhealthy + revisionStable, err := ceohelpers.IsRevisionStable(c.operatorClient) + if err != nil { + return fmt.Errorf("couldn't determine stability of revisions: %w", err) + } + if !revisionStable { + klog.V(2).Infof("skipping due to revision in progress") + return nil + } + + // Ensure the live etcd membership matches the etcd-endpoints configmap. + // This prevents removing multiple members in quick succession before the configmap is updated + // and propagated through a revision rollout. + etcdEndpointsUpdated, err := c.isEtcdEndpointsUpdated(ctx) + if err != nil { + return fmt.Errorf("failed to check if etcd endpoints are updated: %w", err) + } + if !etcdEndpointsUpdated { + return nil + } + // only attempt to scale down if the machine API is functional if isFunctional, err := c.machineAPIChecker.IsFunctional(); err != nil { return err @@ -513,3 +552,28 @@ func machineFailureDomainCountByIndex(machines []*machinev1beta1.Machine) map[in } return count } + +// isEtcdEndpointsUpdated checks whether the live etcd membership matches the etcd-endpoints configmap. +// This prevents removing multiple members in quick succession before the configmap is updated +// and propagated through a revision rollout. +func (c *clusterMemberRemovalController) isEtcdEndpointsUpdated(ctx context.Context) (bool, error) { + liveVotingMemberIPs, err := ceohelpers.VotingMemberIPListSet(ctx, c.etcdClient) + if err != nil { + return false, fmt.Errorf("failed to get live voting member IPs: %w", err) + } + + etcdEndpointsConfigMap, err := c.configMapListerForTargetNamespace.Get("etcd-endpoints") + if err != nil { + return false, fmt.Errorf("failed to get etcd-endpoints configmap: %w", err) + } + + configMapMemberIPs := ceohelpers.MemberIPSetFromConfigMap(etcdEndpointsConfigMap) + + if !liveVotingMemberIPs.Equal(configMapMemberIPs) { + klog.V(2).Infof("skipping member removal: live etcd membership differs from etcd-endpoints configmap. Live members: %v, ConfigMap members: %v", + liveVotingMemberIPs.List(), configMapMemberIPs.List()) + return false, nil + } + + return true, nil +} diff --git a/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller_test.go b/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller_test.go index f680749af2..f3a7619af2 100644 --- a/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller_test.go +++ b/pkg/operator/clustermemberremovalcontroller/clustermemberremovalcontroller_test.go @@ -1292,6 +1292,144 @@ func wellKnownMasterMachines() []runtime.Object { } } +func TestIsEtcdEndpointsUpdated(t *testing.T) { + scenarios := []struct { + name string + initialObjectsForConfigMapTargetNSLister []runtime.Object + initialEtcdMemberList []*etcdserverpb.Member + expectedResult bool + expectError bool + expectedErrorMsg string + }{ + { + name: "live membership equals configmap membership", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{ + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd-endpoints", Namespace: "openshift-etcd"}, + Data: map[string]string{ + "m-1": "10.0.139.78", + "m-2": "10.0.139.79", + "m-3": "10.0.139.80", + }, + }, + }, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + {Name: "m-2", ID: 2, PeerURLs: []string{"https://10.0.139.79:1234"}}, + {Name: "m-3", ID: 3, PeerURLs: []string{"https://10.0.139.80:1234"}}, + }, + expectedResult: true, + expectError: false, + }, + { + name: "live membership has more members than configmap (scaling up)", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{ + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd-endpoints", Namespace: "openshift-etcd"}, + Data: map[string]string{ + "m-1": "10.0.139.78", + "m-2": "10.0.139.79", + "m-3": "10.0.139.80", + }, + }, + }, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + {Name: "m-2", ID: 2, PeerURLs: []string{"https://10.0.139.79:1234"}}, + {Name: "m-3", ID: 3, PeerURLs: []string{"https://10.0.139.80:1234"}}, + {Name: "m-4", ID: 4, PeerURLs: []string{"https://10.0.139.81:1234"}}, + }, + expectedResult: false, + expectError: false, + }, + { + name: "live membership has fewer members than configmap (scaling down)", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{ + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd-endpoints", Namespace: "openshift-etcd"}, + Data: map[string]string{ + "m-1": "10.0.139.78", + "m-2": "10.0.139.79", + "m-3": "10.0.139.80", + "m-4": "10.0.139.81", + }, + }, + }, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + {Name: "m-2", ID: 2, PeerURLs: []string{"https://10.0.139.79:1234"}}, + {Name: "m-3", ID: 3, PeerURLs: []string{"https://10.0.139.80:1234"}}, + }, + expectedResult: false, + expectError: false, + }, + { + name: "live membership differs from configmap (different IPs)", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{ + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd-endpoints", Namespace: "openshift-etcd"}, + Data: map[string]string{ + "m-1": "10.0.139.78", + "m-2": "10.0.139.79", + "m-3": "10.0.139.80", + }, + }, + }, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + {Name: "m-2", ID: 2, PeerURLs: []string{"https://10.0.139.79:1234"}}, + {Name: "m-4", ID: 4, PeerURLs: []string{"https://10.0.139.81:1234"}}, + }, + expectedResult: false, + expectError: false, + }, + { + name: "configmap not found", + initialObjectsForConfigMapTargetNSLister: []runtime.Object{}, + initialEtcdMemberList: []*etcdserverpb.Member{ + {Name: "m-1", ID: 1, PeerURLs: []string{"https://10.0.139.78:1234"}}, + }, + expectedResult: false, + expectError: true, + expectedErrorMsg: "failed to get etcd-endpoints configmap", + }, + } + + for _, scenario := range scenarios { + t.Run(scenario.name, func(t *testing.T) { + // setup test data + configMapTargetNSIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) + for _, initialObj := range scenario.initialObjectsForConfigMapTargetNSLister { + configMapTargetNSIndexer.Add(initialObj) + } + configMapTargetNSLister := corev1listers.NewConfigMapLister(configMapTargetNSIndexer).ConfigMaps("openshift-etcd") + + fakeEtcdClient, err := etcdcli.NewFakeEtcdClient(scenario.initialEtcdMemberList) + require.NoError(t, err) + + // create controller instance + target := clusterMemberRemovalController{ + etcdClient: fakeEtcdClient, + configMapListerForTargetNamespace: configMapTargetNSLister, + } + + // act + result, err := target.isEtcdEndpointsUpdated(context.TODO()) + + // assert + if scenario.expectError { + require.Error(t, err) + if scenario.expectedErrorMsg != "" { + require.Contains(t, err.Error(), scenario.expectedErrorMsg) + } + } else { + require.NoError(t, err) + require.Equal(t, scenario.expectedResult, result, "isEtcdEndpointsUpdated returned unexpected result") + } + }) + } +} + var wellKnownReplicasCountSet = ` { "controlPlane": {"replicas": 3}