@@ -15,6 +15,7 @@ import (
1515 apierrors "k8s.io/apimachinery/pkg/api/errors"
1616 "k8s.io/apimachinery/pkg/api/resource"
1717 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+ "k8s.io/apimachinery/pkg/util/sets"
1819 "k8s.io/client-go/rest"
1920 "k8s.io/client-go/util/retry"
2021 "k8s.io/utils/ptr"
@@ -35,6 +36,11 @@ import (
3536const (
3637 workerMachinePoolName = "worker"
3738 infraMachinePoolName = "infra"
39+
40+ capiClusterKey = "machine.openshift.io/cluster-api-cluster"
41+ capiMachineTypeKey = "machine.openshift.io/cluster-api-machine-type"
42+ hiveMachinePoolKey = "hive.openshift.io/machine-pool"
43+ openshiftMachineAPI = "openshift-machine-api"
3844)
3945
4046func TestScaleMachinePool (t * testing.T ) {
@@ -52,27 +58,23 @@ func TestScaleMachinePool(t *testing.T) {
5258 }
5359
5460 c := common .MustGetClient ()
55- machinePrefix , err := machineNamePrefix (cd , workerMachinePoolName )
56- require .NoError (t , err , "cannot determine machine name prefix" )
5761
5862 // Scale down
59- err = retry .RetryOnConflict (retry .DefaultRetry , func () error {
63+ err : = retry .RetryOnConflict (retry .DefaultRetry , func () error {
6064 pool := common .GetMachinePool (c , cd , workerMachinePoolName )
6165 require .NotNilf (t , pool , "worker machine pool does not exist: %s" , workerMachinePoolName )
6266
6367 logger = logger .WithField ("pool" , pool .Name )
64- logger .Infof ("expected Machine name prefix: %s" , machinePrefix )
65-
6668 logger .Info ("scaling pool to 1 replicas" )
6769 pool .Spec .Replicas = ptr .To (int64 (1 ))
6870 return c .Update (context .TODO (), pool )
6971 })
7072 require .NoError (t , err , "cannot update worker machine pool to reduce replicas" )
7173
72- err = waitForMachines (logger , cfg , cd , machinePrefix , 1 )
74+ err = waitForMachines (logger , cfg , cd , workerMachinePoolName , 1 )
7375 require .NoError (t , err , "timed out waiting for machines to be scaled down" )
7476
75- err = waitForNodes (logger , cfg , cd , machinePrefix , 1 )
77+ err = waitForNodes (logger , cfg , cd , workerMachinePoolName , 1 )
7678 require .NoError (t , err , "timed out waiting for nodes to be scaled down" )
7779
7880 // Scale up
@@ -86,10 +88,10 @@ func TestScaleMachinePool(t *testing.T) {
8688 })
8789 require .NoError (t , err , "cannot update worker machine pool to increase replicas" )
8890
89- err = waitForMachines (logger , cfg , cd , machinePrefix , 3 )
91+ err = waitForMachines (logger , cfg , cd , workerMachinePoolName , 3 )
9092 require .NoError (t , err , "timed out waiting for machines to be scaled up" )
9193
92- err = waitForNodes (logger , cfg , cd , machinePrefix , 3 )
94+ err = waitForNodes (logger , cfg , cd , workerMachinePoolName , 3 )
9395 require .NoError (t , err , "timed out waiting for nodes to be scaled up" )
9496}
9597
@@ -162,17 +164,11 @@ func TestNewMachinePool(t *testing.T) {
162164 err := c .Create (context .TODO (), infraMachinePool )
163165 require .NoError (t , err , "cannot create infra machine pool" )
164166
165- machinePrefix , err := machineNamePrefix (cd , infraMachinePoolName )
166- require .NoError (t , err , "cannot find/calculate machine name prefix" )
167- logger .Infof ("expected Machine name prefix: %s" , machinePrefix )
168-
169- // Wait for machines to be created
170167 t .Logf ("Waiting for 3 infra machines to be created" )
171- err = waitForMachines (logger , cfg , cd , machinePrefix , 3 )
168+ err = waitForMachines (logger , cfg , cd , infraMachinePoolName , 3 )
172169 require .NoError (t , err , "timed out waiting for machines to be created" )
173170
174- err = waitForNodes (logger , cfg , cd , machinePrefix , 3 ,
175- // Ensure that labels were applied to the nodes
171+ err = waitForNodes (logger , cfg , cd , infraMachinePoolName , 3 ,
176172 func (node * corev1.Node ) bool {
177173 if machineType := node .Labels ["openshift.io/machine-type" ]; machineType != infraMachinePoolName {
178174 t .Logf ("Did not find expected label in node" )
@@ -205,7 +201,7 @@ func TestNewMachinePool(t *testing.T) {
205201 cfg ,
206202 func (machineSets []* machinev1.MachineSet ) bool {
207203 for _ , ms := range machineSets {
208- if strings . HasPrefix ( ms .Name , machinePrefix ) {
204+ if ms .Labels [ hiveMachinePoolKey ] == infraMachinePoolName {
209205 return false
210206 }
211207 }
@@ -277,9 +273,6 @@ func TestAutoscalingMachinePool(t *testing.T) {
277273 require .NoError (t , err , "cannot update worker machine pool to reduce replicas" )
278274 logger = logger .WithField ("pool" , pool .Name )
279275
280- machinePrefix , err := machineNamePrefix (cd , workerMachinePoolName )
281- require .NoError (t , err , "cannot find/calculate machine name prefix" )
282-
283276 logger .Info ("lowering autoscaler delay so scaling down happens faster" )
284277 clusterAutoscaler := & autoscalingv1.ClusterAutoscaler {}
285278poll:
@@ -368,6 +361,7 @@ poll:
368361 },
369362 }},
370363 SecurityContext : & corev1.PodSecurityContext {
364+ RunAsUser : ptr .To (int64 (1000 )),
371365 RunAsNonRoot : ptr .To (true ),
372366 SeccompProfile : & corev1.SeccompProfile {
373367 Type : corev1 .SeccompProfileTypeRuntimeDefault ,
@@ -380,9 +374,9 @@ poll:
380374 err = rc .Create (context .TODO (), busyboxDeployment )
381375 require .NoError (t , err , "cannot create busybox deployment" )
382376
383- err = waitForMachines (logger , cfg , cd , machinePrefix , maxReplicas )
377+ err = waitForMachines (logger , cfg , cd , workerMachinePoolName , maxReplicas )
384378 require .NoError (t , err , "timed out waiting for machines to be created" )
385- err = waitForNodes (logger , cfg , cd , machinePrefix , maxReplicas )
379+ err = waitForNodes (logger , cfg , cd , workerMachinePoolName , maxReplicas )
386380 require .NoError (t , err , "timed out waiting for nodes to be created" )
387381
388382 // Scale down
@@ -392,9 +386,9 @@ poll:
392386 logger .Info ("deleting busybox deployment to relieve cpu pressure and scale down machines" )
393387 err = rc .Delete (context .TODO (), busyboxDeployment , client .PropagationPolicy (metav1 .DeletePropagationForeground ))
394388 require .NoError (t , err , "could not delete busybox deployment" )
395- err = waitForMachines (logger , cfg , cd , machinePrefix , minReplicas )
389+ err = waitForMachines (logger , cfg , cd , workerMachinePoolName , minReplicas )
396390 require .NoError (t , err , "timed out waiting for machine count" )
397- err = waitForNodes (logger , cfg , cd , machinePrefix , minReplicas )
391+ err = waitForNodes (logger , cfg , cd , workerMachinePoolName , minReplicas )
398392 require .NoError (t , err , "timed out waiting for nodes to be created" )
399393
400394 logger .Info ("disabling autoscaling" )
@@ -407,45 +401,57 @@ poll:
407401 return c .Update (context .TODO (), pool )
408402 })
409403 require .NoError (t , err , "cannot update worker machine pool to turn off auto-scaling" )
410- err = waitForMachines (logger , cfg , cd , machinePrefix , 3 )
404+ err = waitForMachines (logger , cfg , cd , workerMachinePoolName , 3 )
411405 require .NoError (t , err , "timed out waiting for machines to be created" )
412- err = waitForNodes (logger , cfg , cd , machinePrefix , 3 )
406+ err = waitForNodes (logger , cfg , cd , workerMachinePoolName , 3 )
413407 require .NoError (t , err , "timed out waiting for nodes to be created" )
414408}
415409
416- func waitForMachines (logger log.FieldLogger , cfg * rest.Config , cd * hivev1.ClusterDeployment , machinePrefix string , expectedReplicas int ) error {
417- logger .Infof ("waiting for %d machines with prefix '%s'" , expectedReplicas , machinePrefix )
410+ func waitForMachines (logger log.FieldLogger , cfg * rest.Config , cd * hivev1.ClusterDeployment , poolName string , expectedReplicas int ) error {
411+ infraID := cd .Spec .ClusterMetadata .InfraID
412+ logger .Infof ("waiting for %d machines (pool %s, infraID %s)" , expectedReplicas , poolName , infraID )
418413 lastCount := 0
419414 return common .WaitForMachines (cfg , func (machines []* machinev1.Machine ) bool {
420415 count := 0
421416 for _ , m := range machines {
422- if strings . HasPrefix ( m . Name , machinePrefix ) {
417+ if m . Labels [ capiClusterKey ] == infraID && m . Labels [ capiMachineTypeKey ] == poolName {
423418 count ++
424419 }
425420 }
426421 if count != lastCount {
427- logger .Infof ("found %d machines with prefix '%s' " , count , machinePrefix )
422+ logger .Infof ("found %d machines for pool %s " , count , poolName )
428423 lastCount = count
429424 }
430425 return count == expectedReplicas
431- }, 20 * time .Minute )
426+ }, 30 * time .Minute )
432427}
433428
434- func waitForNodes (logger log.FieldLogger , cfg * rest.Config , cd * hivev1.ClusterDeployment , machinePrefix string , expectedReplicas int , extraChecks ... func (node * corev1.Node ) bool ) error {
435- logger .Infof ("waiting for %d nodes with machine annotation prefix '%s'" , expectedReplicas , machinePrefix )
429+ func waitForNodes (logger log.FieldLogger , cfg * rest.Config , cd * hivev1.ClusterDeployment , poolName string , expectedReplicas int , extraChecks ... func (node * corev1.Node ) bool ) error {
430+ infraID := cd .Spec .ClusterMetadata .InfraID
431+ logger .Infof ("waiting for %d nodes (pool %s)" , expectedReplicas , poolName )
432+ rc := common .MustGetClientFromConfig (cfg )
436433 return common .WaitForNodes (cfg , func (nodes []* corev1.Node ) bool {
434+ machineList := & machinev1.MachineList {}
435+ err := rc .List (context .TODO (), machineList , client .InNamespace (openshiftMachineAPI ),
436+ client.MatchingLabels {capiClusterKey : infraID , capiMachineTypeKey : poolName })
437+ if err != nil {
438+ return false
439+ }
440+ poolMachineNames := sets .New [string ]()
441+ for i := range machineList .Items {
442+ poolMachineNames .Insert (machineList .Items [i ].Name )
443+ }
437444 poolNodes := []* corev1.Node {}
438445 for _ , n := range nodes {
439446 if n .Annotations == nil {
440447 continue
441448 }
442449 machineAnnotation := n .Annotations ["machine.openshift.io/machine" ]
443- name := strings .Split (machineAnnotation , "/" )
444- if len (name ) < 2 {
450+ parts := strings .Split (machineAnnotation , "/" )
451+ if len (parts ) < 2 {
445452 continue
446453 }
447- machineName := name [1 ]
448- if strings .HasPrefix (machineName , machinePrefix ) {
454+ if poolMachineNames .Has (parts [1 ]) {
449455 poolNodes = append (poolNodes , n )
450456 }
451457 }
@@ -463,9 +469,5 @@ func waitForNodes(logger log.FieldLogger, cfg *rest.Config, cd *hivev1.ClusterDe
463469 }
464470
465471 return true
466- }, 15 * time .Minute )
467- }
468-
469- func machineNamePrefix (cd * hivev1.ClusterDeployment , poolName string ) (string , error ) {
470- return fmt .Sprintf ("%s-%s-" , cd .Spec .ClusterMetadata .InfraID , poolName ), nil
472+ }, 30 * time .Minute )
471473}
0 commit comments