diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 3e30a3ebe..23719d296 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,6 +7,13 @@ updates: commit-message: prefix: "chore" + - package-ecosystem: "docker" + directory: "/docker" + schedule: + interval: "weekly" + commit-message: + prefix: "chore" + - package-ecosystem: "gomod" directory: "/" schedule: diff --git a/.github/workflows/chart.yml b/.github/workflows/chart.yml index e01ef19ec..182ea3cfc 100644 --- a/.github/workflows/chart.yml +++ b/.github/workflows/chart.yml @@ -27,12 +27,12 @@ jobs: needs: export-registry runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: true fetch-depth: 0 - name: Publish Helm chart to GitHub Pages - uses: stefanprodan/helm-gh-pages@v1.7.0 + uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # v1.7.0 with: token: ${{ secrets.GITHUB_TOKEN }} charts_dir: charts @@ -44,10 +44,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Login to GitHub Container Registry - uses: docker/login-action@v3.6.0 + uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dd9027ca2..866369a99 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: steps: - name: Detect No-op Changes id: noop - uses: fkirc/skip-duplicate-actions@v5.3.1 + uses: fkirc/skip-duplicate-actions@f75f66ce1886f00957d99748a42c724f4330bdcf # v5.3.1 with: github_token: ${{ secrets.GITHUB_TOKEN }} do_not_skip: '["workflow_dispatch", "schedule", "push"]' @@ -36,12 +36,12 @@ jobs: if: needs.detect-noop.outputs.noop != 'true' steps: - name: Set up Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: go-version: ${{ env.GO_VERSION }} - name: Check out code into the Go module directory - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Ginkgo CLI run: | @@ -76,7 +76,7 @@ jobs: KUBEFLEET_CI_TEST_RUNNER_NAME: 'ginkgo' - name: Upload Codecov report - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0 with: ## Repository upload token - get it from codecov.io. Required only for private repositories token: ${{ secrets.CODECOV_TOKEN }} @@ -111,12 +111,12 @@ jobs: if: needs.detect-noop.outputs.noop != 'true' steps: - name: Set up Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: go-version: ${{ env.GO_VERSION }} - name: Check out code into the Go module directory - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Move Docker data directory to /mnt # The default storage device on GitHub-hosted runners is running low during e2e tests. @@ -184,7 +184,7 @@ jobs: - name: Upload logs if: always() - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 with: name: e2e-logs-${{ matrix.customized-settings }} path: test/e2e/logs-${{ matrix.customized-settings }}/ diff --git a/.github/workflows/code-lint.yml b/.github/workflows/code-lint.yml index 17602a79c..36adee70f 100644 --- a/.github/workflows/code-lint.yml +++ b/.github/workflows/code-lint.yml @@ -24,7 +24,7 @@ jobs: steps: - name: Detect No-op Changes id: noop - uses: fkirc/skip-duplicate-actions@v5.3.1 + uses: fkirc/skip-duplicate-actions@f75f66ce1886f00957d99748a42c724f4330bdcf # v5.3.1 with: github_token: ${{ secrets.GITHUB_TOKEN }} do_not_skip: '["workflow_dispatch", "schedule", "push"]' @@ -37,12 +37,12 @@ jobs: steps: - name: Setup Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: go-version: ${{ env.GO_VERSION }} - name: Checkout - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: true @@ -58,12 +58,12 @@ jobs: steps: - name: Set up Go ${{ env.GO_VERSION }} - uses: actions/setup-go@v6 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: go-version: ${{ env.GO_VERSION }} - name: Check out code into the Go module directory - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: golangci-lint run: make lint @@ -76,10 +76,10 @@ jobs: steps: - name: Check out code - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Helm - uses: azure/setup-helm@v5 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5 with: version: v3.17.0 diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2f47f189e..ab5b97a4e 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -38,11 +38,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v4 + uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -56,7 +56,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v4 + uses: github/codeql-action/autobuild@c10b8064de6f491fea524254123dbe5e09572f13 # v4 # â„šī¸ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -69,4 +69,4 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v4 + uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4 diff --git a/.github/workflows/markdown-lint.yml b/.github/workflows/markdown-lint.yml index d1ddd51d0..2d96457c5 100644 --- a/.github/workflows/markdown-lint.yml +++ b/.github/workflows/markdown-lint.yml @@ -10,8 +10,8 @@ jobs: markdown-link-check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.2 - - uses: tcort/github-action-markdown-link-check@v1 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: tcort/github-action-markdown-link-check@e7c7a18363c842693fadde5d41a3bd3573a7a225 # v1 with: # this will only show errors in the output use-quiet-mode: 'yes' diff --git a/.github/workflows/pr-title-lint.yml b/.github/workflows/pr-title-lint.yml index b7fb5ee63..1178161c0 100644 --- a/.github/workflows/pr-title-lint.yml +++ b/.github/workflows/pr-title-lint.yml @@ -17,7 +17,7 @@ jobs: check: runs-on: ubuntu-latest steps: - - uses: thehanimo/pr-title-checker@v1.4.3 + - uses: thehanimo/pr-title-checker@7fbfe05602bdd86f926d3fb3bccb6f3aed43bc70 # v1.4.3 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} pass_on_octokit_error: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e69de29bb..3e43ff244 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -0,0 +1,79 @@ +name: Release Images + +on: + push: + tags: + - "v*.*.*" + workflow_dispatch: + inputs: + tag: + description: "Release tag (e.g., v1.0.0)" + required: true + type: string + +permissions: + contents: read + packages: write + +env: + REGISTRY: ghcr.io + HUB_AGENT_IMAGE_NAME: hub-agent + MEMBER_AGENT_IMAGE_NAME: member-agent + REFRESH_TOKEN_IMAGE_NAME: refresh-token + GO_VERSION: "1.25.8" + +jobs: + export-registry: + uses: ./.github/workflows/setup-release.yml + with: + tag: ${{ inputs.tag || github.ref_name }} + + build-and-publish: + needs: export-registry + env: + REGISTRY: ${{ needs.export-registry.outputs.registry }} + TAG: ${{ needs.export-registry.outputs.tag }} + runs-on: ubuntu-latest + steps: + - name: Set up Go ${{ env.GO_VERSION }} + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: ${{ needs.export-registry.outputs.tag }} + + - name: Login to ghcr.io + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push images with tag ${{ env.TAG }} + run: | + make push + + - name: Tag and push images without v prefix + env: + VERSION: ${{ needs.export-registry.outputs.version }} + run: | + for IMAGE in ${{ env.HUB_AGENT_IMAGE_NAME }} ${{ env.MEMBER_AGENT_IMAGE_NAME }} ${{ env.REFRESH_TOKEN_IMAGE_NAME }}; do + docker buildx imagetools create \ + --tag "${{ env.REGISTRY }}/${IMAGE}:${VERSION}" \ + "${{ env.REGISTRY }}/${IMAGE}:${{ env.TAG }}" + done + + - name: Verify images + env: + VERSION: ${{ needs.export-registry.outputs.version }} + run: | + echo "✅ Published images:" + echo " - ${{ env.REGISTRY }}/${{ env.HUB_AGENT_IMAGE_NAME }}:${{ env.TAG }}" + echo " - ${{ env.REGISTRY }}/${{ env.HUB_AGENT_IMAGE_NAME }}:${VERSION}" + echo " - ${{ env.REGISTRY }}/${{ env.MEMBER_AGENT_IMAGE_NAME }}:${{ env.TAG }}" + echo " - ${{ env.REGISTRY }}/${{ env.MEMBER_AGENT_IMAGE_NAME }}:${VERSION}" + echo " - ${{ env.REGISTRY }}/${{ env.REFRESH_TOKEN_IMAGE_NAME }}:${{ env.TAG }}" + echo " - ${{ env.REGISTRY }}/${{ env.REFRESH_TOKEN_IMAGE_NAME }}:${VERSION}" diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index b5cf2b8a6..7f9bd13de 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -39,12 +39,12 @@ jobs: runs-on: ubuntu-latest #Latest tag points to the latest LTS release of Ubuntu per docker hub steps: - name: Set up Go ${{ env.GO_VERSION }} - uses: actions/setup-go@v6 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: go-version: ${{ env.GO_VERSION }} - name: Checkout code - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Login to ${{ env.REGISTRY }} uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 @@ -64,7 +64,7 @@ jobs: TAG: ${{ env.IMAGE_VERSION }} - name: Scan ${{ env.REGISTRY }}/${{ env.HUB_AGENT_IMAGE_NAME }}:${{ env.IMAGE_VERSION }} - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # v0.35.0 with: image-ref: ${{ env.REGISTRY }}/${{ env.HUB_AGENT_IMAGE_NAME }}:${{ env.IMAGE_VERSION }} format: 'table' @@ -80,7 +80,7 @@ jobs: - name: Scan ${{ env.REGISTRY }}/${{ env.MEMBER_AGENT_IMAGE_NAME }}:${{ env.IMAGE_VERSION }} - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # v0.35.0 with: image-ref: ${{ env.REGISTRY }}/${{ env.MEMBER_AGENT_IMAGE_NAME }}:${{ env.IMAGE_VERSION }} format: 'table' @@ -95,7 +95,7 @@ jobs: TRIVY_DB_REPOSITORY: mcr.microsoft.com/mirror/ghcr/aquasecurity/trivy-db - name: Scan ${{ env.REGISTRY }}/${{ env.REFRESH_TOKEN_IMAGE_NAME }}:${{ env.IMAGE_VERSION }} - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # v0.35.0 with: image-ref: ${{ env.REGISTRY }}/${{ env.REFRESH_TOKEN_IMAGE_NAME }}:${{ env.IMAGE_VERSION }} format: 'table' diff --git a/.github/workflows/upgrade.yml b/.github/workflows/upgrade.yml index b3e3150d4..8bcd6e0fd 100644 --- a/.github/workflows/upgrade.yml +++ b/.github/workflows/upgrade.yml @@ -27,7 +27,7 @@ jobs: steps: - name: Detect No-op Changes id: noop - uses: fkirc/skip-duplicate-actions@v5.3.1 + uses: fkirc/skip-duplicate-actions@f75f66ce1886f00957d99748a42c724f4330bdcf # v5.3.1 with: github_token: ${{ secrets.GITHUB_TOKEN }} do_not_skip: '["workflow_dispatch", "schedule", "push"]' @@ -39,12 +39,12 @@ jobs: if: needs.detect-noop.outputs.noop != 'true' steps: - name: Set up Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: go-version: ${{ env.GO_VERSION }} - name: Check out code into the Go module directory - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: # Fetch the history of all branches and tags. # This is needed for the test suite to switch between releases. @@ -141,12 +141,12 @@ jobs: if: needs.detect-noop.outputs.noop != 'true' steps: - name: Set up Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: go-version: ${{ env.GO_VERSION }} - name: Check out code into the Go module directory - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: # Fetch the history of all branches and tags. # This is needed for the test suite to switch between releases. @@ -243,12 +243,12 @@ jobs: if: needs.detect-noop.outputs.noop != 'true' steps: - name: Set up Go - uses: actions/setup-go@v6 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 with: go-version: ${{ env.GO_VERSION }} - name: Check out code into the Go module directory - uses: actions/checkout@v6.0.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: # Fetch the history of all branches and tags. # This is needed for the test suite to switch between releases. diff --git a/charts/hub-agent/README.md b/charts/hub-agent/README.md index 12663dffb..11eb8ffed 100644 --- a/charts/hub-agent/README.md +++ b/charts/hub-agent/README.md @@ -34,8 +34,7 @@ helm install hub-agent kubefleet/hub-agent --namespace fleet-system --create-nam ### Local Installation from Source ```console -# Helm install with fleet-system namespace already created -helm install hub-agent ./charts/hub-agent/ +helm install hub-agent ./charts/hub-agent/ --namespace fleet-system --create-namespace ``` ### Installation with cert-manager diff --git a/charts/hub-agent/templates/namespace.yaml b/charts/hub-agent/templates/namespace.yaml deleted file mode 100644 index 77db5f9f6..000000000 --- a/charts/hub-agent/templates/namespace.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: {{ .Values.namespace }} diff --git a/charts/member-agent/README.md b/charts/member-agent/README.md index ccbcd1bc4..6a25c6333 100644 --- a/charts/member-agent/README.md +++ b/charts/member-agent/README.md @@ -34,10 +34,7 @@ helm install member-agent kubefleet/member-agent --namespace fleet-system --crea ### From Local Source ```console -# Go to `charts` folder inside the repo -cd /fleet/charts -# Helm install -helm install member-agent member-agent/ +helm install member-agent ./charts/member-agent/ --namespace fleet-system --create-namespace ``` _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ diff --git a/charts/member-agent/templates/namespace.yaml b/charts/member-agent/templates/namespace.yaml deleted file mode 100644 index c248747dc..000000000 --- a/charts/member-agent/templates/namespace.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: {{ .Values.namespace}} diff --git a/cmd/hubagent/main.go b/cmd/hubagent/main.go index 8d7a0c341..c41407496 100644 --- a/cmd/hubagent/main.go +++ b/cmd/hubagent/main.go @@ -198,6 +198,14 @@ func main() { exitWithErrorFunc() } + // Add webhook server readiness check to ensure the pod is not marked ready until + // the webhook HTTPS listener is actually serving. This uses controller-runtime's + // built-in StartedChecker which verifies the server is listening on the port. + if err := mgr.AddReadyzCheck("webhook-server", mgr.GetWebhookServer().StartedChecker()); err != nil { + klog.ErrorS(err, "unable to set up webhook server readiness check") + exitWithErrorFunc() + } + // When using cert-manager, add a readiness check to ensure CA bundles are injected before marking ready. // This prevents the pod from accepting traffic before cert-manager has populated the webhook CA bundles, // which would cause webhook calls to fail. diff --git a/pkg/controllers/updaterun/execution.go b/pkg/controllers/updaterun/execution.go index 5daab888c..3db55c3e6 100644 --- a/pkg/controllers/updaterun/execution.go +++ b/pkg/controllers/updaterun/execution.go @@ -173,7 +173,13 @@ func (r *Reconciler) executeUpdatingStage( } // The cluster needs to be processed. clusterStartedCond := meta.FindStatusCondition(clusterStatus.Conditions, string(placementv1beta1.ClusterUpdatingConditionStarted)) - binding := toBeUpdatedBindingsMap[clusterStatus.ClusterName] + binding, exists := toBeUpdatedBindingsMap[clusterStatus.ClusterName] + if !exists || binding == nil { + missingBindingErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the binding for cluster `%s` in stage `%s` is not found in the toBeUpdatedBindings map", clusterStatus.ClusterName, updatingStageStatus.StageName)) + klog.ErrorS(missingBindingErr, "Cannot find the binding for the cluster in the updating stage", "cluster", clusterStatus.ClusterName, "stage", updatingStageStatus.StageName, "updateRun", updateRunRef) + clusterUpdateErrors = append(clusterUpdateErrors, fmt.Errorf("%w: %s", errStagedUpdatedAborted, missingBindingErr.Error())) + continue + } if !condition.IsConditionStatusTrue(clusterStartedCond, updateRun.GetGeneration()) { // The cluster has not started updating yet. if !isBindingSyncedWithClusterStatus(resourceSnapshotName, updateRun, binding, clusterStatus) { diff --git a/pkg/controllers/updaterun/execution_test.go b/pkg/controllers/updaterun/execution_test.go index 97cbfce72..cde44d330 100644 --- a/pkg/controllers/updaterun/execution_test.go +++ b/pkg/controllers/updaterun/execution_test.go @@ -524,6 +524,43 @@ func TestExecuteUpdatingStage_Error(t *testing.T) { wantErr: errors.New("simulated update error"), wantWaitTime: 0, }, + { + name: "missing binding in map lookup - nil pointer guard", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-run", + Generation: 1, + }, + Spec: placementv1beta1.UpdateRunSpec{ + PlacementName: "test-placement", + ResourceSnapshotIndex: "1", + }, + Status: placementv1beta1.UpdateRunStatus{ + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + StageName: "test-stage", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + }, + }, + }, + }, + UpdateStrategySnapshot: &placementv1beta1.UpdateStrategySpec{ + Stages: []placementv1beta1.StageConfig{ + { + Name: "test-stage", + MaxConcurrency: &intstr.IntOrString{Type: intstr.Int, IntVal: 1}, + }, + }, + }, + }, + }, + bindings: nil, // No bindings provided, so cluster-1 will not be found in the map. + wantErr: errors.New("the binding for cluster `cluster-1` in stage `test-stage` is not found in the toBeUpdatedBindings map"), + wantAbortErr: true, + wantWaitTime: 0, + }, { name: "binding preemption", updateRun: &placementv1beta1.ClusterStagedUpdateRun{ diff --git a/pkg/controllers/updaterun/stop.go b/pkg/controllers/updaterun/stop.go index f36604db6..eab8ea088 100644 --- a/pkg/controllers/updaterun/stop.go +++ b/pkg/controllers/updaterun/stop.go @@ -48,7 +48,8 @@ func (r *Reconciler) stop( markUpdateRunStopping(updateRun) if updatingStageIndex < len(updateRunStatus.StagesStatus) { - return r.stopUpdatingStage(updateRun, updatingStageIndex, toBeUpdatedBindings) + updatingStageStatus = &updateRunStatus.StagesStatus[updatingStageIndex] + return r.stopUpdatingStage(updateRun, updatingStageStatus, toBeUpdatedBindings) } // All the stages have finished, stop the delete stage. finished, stopErr = r.stopDeleteStage(updateRun, toBeDeletedBindings) @@ -58,11 +59,9 @@ func (r *Reconciler) stop( // stopUpdatingStage stops the updating stage by letting the updating bindings finish and not starting new updates. func (r *Reconciler) stopUpdatingStage( updateRun placementv1beta1.UpdateRunObj, - updatingStageIndex int, + updatingStageStatus *placementv1beta1.StageUpdatingStatus, toBeUpdatedBindings []placementv1beta1.BindingObj, ) (bool, time.Duration, error) { - updateRunStatus := updateRun.GetUpdateRunStatus() - updatingStageStatus := &updateRunStatus.StagesStatus[updatingStageIndex] updateRunRef := klog.KObj(updateRun) // Create the map of the toBeUpdatedBindings. toBeUpdatedBindingsMap := make(map[string]placementv1beta1.BindingObj, len(toBeUpdatedBindings)) @@ -92,7 +91,13 @@ func (r *Reconciler) stopUpdatingStage( clusterUpdatingCount++ - binding := toBeUpdatedBindingsMap[clusterStatus.ClusterName] + binding, exists := toBeUpdatedBindingsMap[clusterStatus.ClusterName] + if !exists || binding == nil { + missingBindingErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the binding for cluster `%s` in stage `%s` is not found in the toBeUpdatedBindings map during stopping", clusterStatus.ClusterName, updatingStageStatus.StageName)) + klog.ErrorS(missingBindingErr, "Cannot find the binding for the cluster in the updating stage during stopping", "cluster", clusterStatus.ClusterName, "stage", updatingStageStatus.StageName, "updateRun", updateRunRef) + clusterUpdateErrors = append(clusterUpdateErrors, fmt.Errorf("%w: %s", errStagedUpdatedAborted, missingBindingErr.Error())) + continue + } finished, updateErr := checkClusterUpdateResult(binding, clusterStatus, updatingStageStatus, updateRun) if updateErr != nil { clusterUpdateErrors = append(clusterUpdateErrors, updateErr) diff --git a/pkg/controllers/updaterun/stop_test.go b/pkg/controllers/updaterun/stop_test.go index b992e7cdf..f18d20de7 100644 --- a/pkg/controllers/updaterun/stop_test.go +++ b/pkg/controllers/updaterun/stop_test.go @@ -33,6 +33,230 @@ import ( "go.goms.io/fleet/pkg/utils/condition" ) +func TestStop(t *testing.T) { + tests := []struct { + name string + updateRun *placementv1beta1.ClusterStagedUpdateRun + updatingStageIndex int + toBeUpdatedBindings []placementv1beta1.BindingObj + toBeDeletedBindings []placementv1beta1.BindingObj + wantErr bool + wantFinished bool + wantStageSucceededCond *metav1.Condition + wantDeletionStageConditionsEmpty bool + }{ + { + name: "stop with errStagedUpdatedAborted marks stage as failed", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-run", + Generation: 1, + }, + Spec: placementv1beta1.UpdateRunSpec{ + PlacementName: "test-placement", + ResourceSnapshotIndex: "1", + }, + Status: placementv1beta1.UpdateRunStatus{ + ResourceSnapshotIndexUsed: "1", + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + StageName: "test-stage", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + Reason: condition.ClusterUpdatingStartedReason, + }, + }, + }, + }, + }, + }, + // DeletionStageStatus exists but has no conditions set yet. + DeletionStageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "deletion", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-to-delete", + }, + }, + }, + }, + }, + updatingStageIndex: 0, + // No bindings provided - this will trigger errStagedUpdatedAborted + // when the binding for cluster-1 is not found in the map. + toBeUpdatedBindings: nil, + toBeDeletedBindings: nil, + wantErr: true, + wantFinished: false, + // Verify the stage is marked as failed (this proves updatingStageStatus was set before defer). + wantStageSucceededCond: &metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionSucceeded), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + Reason: condition.StageUpdatingFailedReason, + }, + // Verify DeletionStageStatus conditions are not incorrectly populated for updating stage errors. + wantDeletionStageConditionsEmpty: true, + }, + { + name: "stop delete stage with errStagedUpdatedAborted marks deletion stage as failed", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-run", + Generation: 1, + }, + Spec: placementv1beta1.UpdateRunSpec{ + PlacementName: "test-placement", + ResourceSnapshotIndex: "1", + }, + Status: placementv1beta1.UpdateRunStatus{ + // All updating stages have completed successfully. + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + StageName: "stage-1", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-0", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + Reason: condition.ClusterUpdatingStartedReason, + }, + { + Type: string(placementv1beta1.ClusterUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + Reason: condition.ClusterUpdatingSucceededReason, + }, + }, + }, + }, + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.StageUpdatingConditionSucceeded), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + Reason: condition.StageUpdatingSucceededReason, + }, + }, + }, + }, + DeletionStageStatus: &placementv1beta1.StageUpdatingStatus{ + StageName: "deletion", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + Reason: condition.ClusterUpdatingStartedReason, + }, + }, + }, + }, + }, + }, + }, + // updatingStageIndex >= len(StagesStatus) triggers the deletion stage code path. + updatingStageIndex: 1, + toBeUpdatedBindings: nil, + // Binding not deleting (no DeletionTimestamp) but cluster is marked as started. + // This triggers errStagedUpdatedAborted. + toBeDeletedBindings: []placementv1beta1.BindingObj{ + &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "binding-1", + // No DeletionTimestamp - this causes the error. + }, + Spec: placementv1beta1.ResourceBindingSpec{ + TargetCluster: "cluster-1", + }, + }, + }, + wantErr: true, + wantFinished: false, + wantStageSucceededCond: &metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionSucceeded), + Status: metav1.ConditionFalse, + ObservedGeneration: 1, + Reason: condition.StageUpdatingFailedReason, + }, // We check DeletionStageStatus separately for this case. + // Verify the deletion stage is marked as failed (this proves the defer handles deletion stage). + wantDeletionStageConditionsEmpty: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheme := runtime.NewScheme() + _ = placementv1beta1.AddToScheme(scheme) + objs := make([]client.Object, len(tt.toBeUpdatedBindings)) + for i := range tt.toBeUpdatedBindings { + objs[i] = tt.toBeUpdatedBindings[i] + } + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(objs...).Build() + r := &Reconciler{ + Client: fakeClient, + } + + finished, _, gotErr := r.stop(tt.updateRun, tt.updatingStageIndex, tt.toBeUpdatedBindings, tt.toBeDeletedBindings) + + // Verify error expectation. + if (gotErr != nil) != tt.wantErr { + t.Fatalf("stop() error = %v, wantErr %v", gotErr, tt.wantErr) + } + + // Verify finished result. + if finished != tt.wantFinished { + t.Fatalf("stop() finished = %v, want %v", finished, tt.wantFinished) + } + + // Verify stage succeeded condition if expected. + if tt.wantStageSucceededCond != nil && tt.wantDeletionStageConditionsEmpty { + succeededCond := meta.FindStatusCondition( + tt.updateRun.Status.StagesStatus[tt.updatingStageIndex].Conditions, + string(placementv1beta1.StageUpdatingConditionSucceeded), + ) + if succeededCond == nil { + t.Fatalf("stop() expected StageUpdatingConditionSucceeded condition to be set, got nil") + } + if diff := cmp.Diff(*tt.wantStageSucceededCond, *succeededCond, cmpOptions...); diff != "" { + t.Errorf("stop() StageUpdatingConditionSucceeded mismatch (-want +got):\n%s", diff) + } + + // Verify DeletionStageStatus conditions are empty when expected. + if len(tt.updateRun.Status.DeletionStageStatus.Conditions) != 0 { + t.Errorf("stop() DeletionStageStatus.Conditions = %v, want empty", tt.updateRun.Status.DeletionStageStatus.Conditions) + } + } + + // Verify deletion stage succeeded condition if expected. + if tt.wantStageSucceededCond != nil && !tt.wantDeletionStageConditionsEmpty { + succeededCond := meta.FindStatusCondition( + tt.updateRun.Status.DeletionStageStatus.Conditions, + string(placementv1beta1.StageUpdatingConditionSucceeded), + ) + if succeededCond == nil { + t.Fatalf("stop() expected DeletionStageStatus StageUpdatingConditionSucceeded condition to be set, got nil") + } + if diff := cmp.Diff(*tt.wantStageSucceededCond, *succeededCond, cmpOptions...); diff != "" { + t.Errorf("stop() DeletionStageStatus StageUpdatingConditionSucceeded mismatch (-want +got):\n%s", diff) + } + } + }) + } +} + func TestStopUpdatingStage(t *testing.T) { tests := []struct { name string @@ -92,6 +316,49 @@ func TestStopUpdatingStage(t *testing.T) { Reason: condition.StageUpdatingStoppedReason, }, }, + { + name: "missing binding in map lookup during stopping - nil pointer guard", + updateRun: &placementv1beta1.ClusterStagedUpdateRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-run", + Generation: 1, + }, + Spec: placementv1beta1.UpdateRunSpec{ + PlacementName: "test-placement", + ResourceSnapshotIndex: "1", + }, + Status: placementv1beta1.UpdateRunStatus{ + StagesStatus: []placementv1beta1.StageUpdatingStatus{ + { + StageName: "test-stage", + Clusters: []placementv1beta1.ClusterUpdatingStatus{ + { + ClusterName: "cluster-1", + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterUpdatingConditionStarted), + Status: metav1.ConditionTrue, + ObservedGeneration: 1, + Reason: condition.ClusterUpdatingStartedReason, + }, + }, + }, + }, + }, + }, + }, + }, + bindings: nil, // No bindings provided, so cluster-1 will not be found in the map. + wantErr: errors.New("the binding for cluster `cluster-1` in stage `test-stage` is not found in the toBeUpdatedBindings map during stopping"), + wantFinished: false, + wantWaitTime: 0, + wantProgressCond: metav1.Condition{ + Type: string(placementv1beta1.StageUpdatingConditionProgressing), + Status: metav1.ConditionUnknown, + ObservedGeneration: 1, + Reason: condition.StageUpdatingStoppingReason, + }, + }, { name: "binding synced, bound, rolloutStarted true, but binding has failed condition", updateRun: &placementv1beta1.ClusterStagedUpdateRun{ @@ -180,7 +447,8 @@ func TestStopUpdatingStage(t *testing.T) { } // Stop the stage. - finished, waitTime, gotErr := r.stopUpdatingStage(tt.updateRun, 0, tt.bindings) + updatingStageStatus := &tt.updateRun.Status.StagesStatus[0] + finished, waitTime, gotErr := r.stopUpdatingStage(tt.updateRun, updatingStageStatus, tt.bindings) // Verify error expectation. if (tt.wantErr != nil) != (gotErr != nil) { diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go index b4477dfcd..7b6457ad9 100644 --- a/pkg/webhook/webhook.go +++ b/pkg/webhook/webhook.go @@ -268,6 +268,7 @@ func (w *Config) Start(ctx context.Context) error { klog.ErrorS(err, "unable to setup webhook configurations in apiserver") return err } + klog.V(2).InfoS("webhook configurations created successfully") return nil } diff --git a/test/e2e/setup.sh b/test/e2e/setup.sh index 86f784242..ab87a9fce 100755 --- a/test/e2e/setup.sh +++ b/test/e2e/setup.sh @@ -137,6 +137,8 @@ helm install cert-manager jetstack/cert-manager \ # Install the hub agent to the hub cluster helm install hub-agent ../../charts/hub-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set image.pullPolicy=Never \ --set image.repository=$REGISTRY/$HUB_AGENT_IMAGE \ --set image.tag=$TAG \ @@ -157,7 +159,9 @@ helm install hub-agent ../../charts/hub-agent/ \ --set logFileMaxSize=100000 \ --set MaxConcurrentClusterPlacement=200 \ --set resourceSnapshotCreationMinimumInterval=$RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL \ - --set resourceChangesCollectionDuration=$RESOURCE_CHANGES_COLLECTION_DURATION + --set resourceChangesCollectionDuration=$RESOURCE_CHANGES_COLLECTION_DURATION \ + --wait \ + --timeout=2m # Download CRDs from Fleet networking repo export ENDPOINT_SLICE_EXPORT_CRD_URL=https://raw.githubusercontent.com/Azure/fleet-networking/v0.2.7/config/crd/bases/networking.fleet.azure.com_endpointsliceexports.yaml @@ -209,6 +213,8 @@ do kind export kubeconfig --name "${MEMBER_CLUSTERS[$i]}" if [ "$i" -lt $RESERVED_CLUSTER_COUNT ]; then helm install member-agent ../../charts/member-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set config.hubURL=$HUB_SERVER_URL \ --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \ --set image.tag=$TAG \ @@ -232,6 +238,8 @@ do $( [ "$PROPERTY_PROVIDER" = "azure" ] && echo "-f azure_valid_config.yaml" ) else helm install member-agent ../../charts/member-agent/ \ + --namespace fleet-system \ + --create-namespace \ --set config.hubURL=$HUB_SERVER_URL \ --set image.repository=$REGISTRY/$MEMBER_AGENT_IMAGE \ --set image.tag=$TAG \ diff --git a/tools/fleet/README.md b/tools/fleet/README.md index b003ac7de..897978db3 100644 --- a/tools/fleet/README.md +++ b/tools/fleet/README.md @@ -60,17 +60,37 @@ CURRENT NAME CLUSTER AUTHINFO Here you can see that the context of the hub cluster is called `hub` under the `NAME` column. -### Approve a ClusterApprovalRequest +### Approve an Approval Request -Use the `approve` subcommand to approve ClusterApprovalRequest resources for staged update runs. This allows staged updates to proceed to the next stage by patching an "Approved" condition to the resource status. +Use the `approve` subcommand to approve approval request resources for staged update runs. This allows staged updates to proceed to the next stage by patching an "Approved" condition to the resource status. +**Supported kinds:** +| Kind | Alias | Scope | Namespace Flag | +|------|-------|-------|----------------| +| `clusterapprovalrequest` | `careq` | Cluster | Not allowed | +| `approvalrequest` | `areq` | Namespace | Required | + +**Cluster-scoped (ClusterApprovalRequest):** ```bash kubectl fleet approve clusterapprovalrequest --hubClusterContext --name +# Or using alias: +kubectl fleet approve careq --hubClusterContext --name +``` + +**Namespace-scoped (ApprovalRequest):** +```bash +kubectl fleet approve approvalrequest --hubClusterContext --name --namespace +# Or using alias: +kubectl fleet approve areq --hubClusterContext --name -n ``` Example: ```bash +# Approve a ClusterApprovalRequest kubectl fleet approve clusterapprovalrequest --hubClusterContext hub --name my-approval-request + +# Approve an ApprovalRequest in a specific namespace +kubectl fleet approve approvalrequest --hubClusterContext hub --name my-approval-request --namespace my-namespace ``` ### Drain a Member Cluster @@ -103,13 +123,17 @@ kubectl fleet uncordoncluster --hubClusterContext hub --clusterName member-clust ### approve -Approves ClusterApprovalRequest resources for staged update runs by: +Approves approval request resources for staged update runs by: -1. **Status Update**: Patches the ClusterApprovalRequest resource with an "Approved" condition +1. **Status Update**: Patches the approval request resource with an "Approved" condition 2. **Stage Progression**: Allows staged updates to proceed to the next stage automatically -The approve command currently supports the following resource kinds: -- `clusterapprovalrequest`: Approve a ClusterApprovalRequest for staged updates +The approve command supports the following resource kinds: + +| Kind | Alias | Scope | Description | +|------|-------|-------|-------------| +| `clusterapprovalrequest` | `careq` | Cluster | Approve a ClusterApprovalRequest (no namespace) | +| `approvalrequest` | `areq` | Namespace | Approve an ApprovalRequest (requires `--namespace`) | ### draincluster @@ -134,6 +158,7 @@ If the `cordon` taint is not present on the member cluster, the command will hav The `approve` subcommand uses the following flags: - `--hubClusterContext`: kubectl context for the hub cluster (required) - `--name`: name of the resource to approve (required) +- `--namespace`, `-n`: namespace of the resource to approve (required for `approvalrequest`, not allowed for `clusterapprovalrequest`) Both `draincluster` and `uncordoncluster` subcommands use the following flags: - `--hubClusterContext`: kubectl context for the hub cluster (required) @@ -163,6 +188,9 @@ kubectl fleet uncordoncluster --hubClusterContext production-hub --clusterName w # Approve a ClusterApprovalRequest for staged updates kubectl fleet approve clusterapprovalrequest --hubClusterContext hub --name update-approval-stage-1 +# Approve a ApprovalRequest for staged updates +kubectl fleet approve approvalrequest --hubClusterContext hub --name update-approval-stage-1 --namespace test-namespace + # Drain multiple clusters (run separately for each cluster) kubectl fleet draincluster --hubClusterContext hub --clusterName east-cluster kubectl fleet draincluster --hubClusterContext hub --clusterName west-cluster @@ -176,6 +204,7 @@ kubectl fleet uncordoncluster --hubClusterContext hub --clusterName west-cluster ### Verifying Approval Operation +#### ClusterApprovalRequest (cluster-scope) After running the approval command, verify that the corresponding clusterApprovalRequest has been approved: 1. Check that the clusterApprovalRequest has `APPROVED` set to true @@ -186,6 +215,17 @@ After running the approval command, verify that the corresponding clusterApprova ``` 2. Verify the updateRun is not blocked by the approval after-stage task +#### ApprovalRequest (namespace-scope) +After running the approval command, verify that the corresponding approvalRequest has been approved: + +1. Check that the clusterApprovalRequest has `APPROVED` set to true + ``` + kubectl get approvalrequest example-run-staging -n test-namspace + NAME UPDATE-RUN STAGE APPROVED AGE + example-run-staging example-run staging True 2m46s + ``` +2. Verify the updateRun is not blocked by the approval after-stage task + ### Verifying Drain Operation After running the draincluster command, verify that resources have been removed from the member cluster: diff --git a/tools/fleet/cmd/approve/approve.go b/tools/fleet/cmd/approve/approve.go index 4002dd630..7d216e5db 100644 --- a/tools/fleet/cmd/approve/approve.go +++ b/tools/fleet/cmd/approve/approve.go @@ -30,13 +30,63 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + fleetcmd "go.goms.io/fleet/tools/fleet/cmd" toolsutils "go.goms.io/fleet/tools/utils" ) +// approveKindConfig extends fleetcmd.KindConfig with approve-specific validation and handler. +type approveKindConfig struct { + fleetcmd.KindConfig + validate func(o *approveOptions) error + handler func(o *approveOptions, ctx context.Context) error +} + +var approveKindConfigs = []approveKindConfig{ + { + KindConfig: fleetcmd.KindConfig{ + Canonical: fleetcmd.KindClusterApprovalRequest, + Aliases: []string{fleetcmd.AliasClusterApprovalRequest}, + }, + validate: func(o *approveOptions) error { + if o.namespace != "" { + return fmt.Errorf("%s is cluster-scoped and does not accept a namespace", fleetcmd.KindClusterApprovalRequest) + } + return nil + }, + handler: (*approveOptions).approveClusterApprovalRequest, + }, + { + KindConfig: fleetcmd.KindConfig{ + Canonical: fleetcmd.KindApprovalRequest, + Aliases: []string{fleetcmd.AliasApprovalRequest}, + }, + validate: func(o *approveOptions) error { + if o.namespace == "" { + return fmt.Errorf("namespace is required for %s (use --namespace or -n flag)", fleetcmd.KindApprovalRequest) + } + return nil + }, + handler: (*approveOptions).approveApprovalRequest, + }, +} + +// approveKinds maps canonical kind names and aliases to their approveKindConfig. +var approveKinds = map[string]*approveKindConfig{} + +func init() { + for i := range approveKindConfigs { + cfg := &approveKindConfigs[i] + approveKinds[cfg.Canonical] = cfg + for _, a := range cfg.Aliases { + approveKinds[a] = cfg + } + } +} + type approveOptions struct { hubClusterContext string - kind string name string + namespace string hubClient client.Client } @@ -49,23 +99,33 @@ func NewCmdApprove() *cobra.Command { Short: "Approve a resource", Long: `Approve a resource by updating its status with an "Approved" condition. -This command updates the clusterApprovalRequest status with an "Approved" condition, +This command updates the approval request status with an "Approved" condition, allowing staged update runs to proceed to the next stage. -Currently supported kinds: - clusterapprovalrequest - Approve a ClusterApprovalRequest`, +Supported kinds: + clusterapprovalrequest (careq) - Approve a ClusterApprovalRequest (cluster-scoped) + approvalrequest (areq) - Approve an ApprovalRequest (namespace-scoped) + +For namespace-scoped resources (approvalrequest), you must also specify the --namespace flag.`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { - o.kind = args[0] + cfg, err := fleetcmd.ResolveKind(args[0], approveKinds) + if err != nil { + return err + } + if err := o.validate(cfg); err != nil { + return err + } if err := o.setupClient(); err != nil { return err } - return o.run(cmd.Context()) + return o.run(cmd.Context(), cfg) }, } cmd.Flags().StringVar(&o.hubClusterContext, "hubClusterContext", "", "The name of the kubeconfig context to use for the hub cluster") cmd.Flags().StringVar(&o.name, "name", "", "The name of the resource to approve") + cmd.Flags().StringVarP(&o.namespace, "namespace", "n", "", "The namespace of the resource to approve (required for namespace-scoped resources)") // Mark required flags. _ = cmd.MarkFlagRequired("hubClusterContext") @@ -74,20 +134,20 @@ Currently supported kinds: return cmd } -func (o *approveOptions) run(ctx context.Context) error { - if o.kind == "" { - return fmt.Errorf("resource kind is required") - } +// validate checks that the options are valid. +func (o *approveOptions) validate(cfg *approveKindConfig) error { if o.name == "" { return fmt.Errorf("resource name is required") } + return cfg.validate(o) +} - // Validate that we only support clusterapprovalrequest for now. - if o.kind != "clusterapprovalrequest" { - return fmt.Errorf("unsupported resource kind %q, only 'clusterapprovalrequest' is supported", o.kind) - } +func (o *approveOptions) run(ctx context.Context, cfg *approveKindConfig) error { + return cfg.handler(o, ctx) +} - // Patch the ClusterApprovalRequest status with approved condition. +// approveClusterApprovalRequest approves a ClusterApprovalRequest (cluster-scoped). +func (o *approveOptions) approveClusterApprovalRequest(ctx context.Context) error { err := retry.RetryOnConflict(retry.DefaultRetry, func() error { var car placementv1beta1.ClusterApprovalRequest if err := o.hubClient.Get(ctx, types.NamespacedName{Name: o.name}, &car); err != nil { @@ -108,7 +168,6 @@ func (o *approveOptions) run(ctx context.Context) error { return o.hubClient.Status().Update(ctx, &car) }) - if err != nil { return fmt.Errorf("failed to approve ClusterApprovalRequest %q: %w", o.name, err) } @@ -117,6 +176,36 @@ func (o *approveOptions) run(ctx context.Context) error { return nil } +// approveApprovalRequest approves an ApprovalRequest (namespace-scoped). +func (o *approveOptions) approveApprovalRequest(ctx context.Context) error { + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + var ar placementv1beta1.ApprovalRequest + if err := o.hubClient.Get(ctx, types.NamespacedName{Name: o.name, Namespace: o.namespace}, &ar); err != nil { + return fmt.Errorf("failed to get ApprovalRequest %q in namespace %q: %w", o.name, o.namespace, err) + } + + // Add the Approved condition. + approvedCondition := metav1.Condition{ + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + Status: metav1.ConditionTrue, + Reason: "ApprovalRequestApproved", + Message: "ApprovalRequest has been approved", + ObservedGeneration: ar.Generation, + } + + // Update or add the condition. + meta.SetStatusCondition(&ar.Status.Conditions, approvedCondition) + + return o.hubClient.Status().Update(ctx, &ar) + }) + if err != nil { + return fmt.Errorf("failed to approve ApprovalRequest %q in namespace %q: %w", o.name, o.namespace, err) + } + + log.Printf("ApprovalRequest %q in namespace %q approved successfully\n", o.name, o.namespace) + return nil +} + // setupClient creates and configures the Kubernetes client func (o *approveOptions) setupClient() error { scheme := runtime.NewScheme() diff --git a/tools/fleet/cmd/approve/approve_test.go b/tools/fleet/cmd/approve/approve_test.go index 28bc662ab..8c838e485 100644 --- a/tools/fleet/cmd/approve/approve_test.go +++ b/tools/fleet/cmd/approve/approve_test.go @@ -30,9 +30,111 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + fleetcmd "go.goms.io/fleet/tools/fleet/cmd" ) -func TestRun(t *testing.T) { +func TestValidate(t *testing.T) { + tests := []struct { + name string + kind string + opts approveOptions + wantErr bool + wantErrMsg string + }{ + { + name: "empty kind should fail in resolveKind", + kind: "", + opts: approveOptions{ + name: "test-name", + }, + wantErr: true, + wantErrMsg: "resource kind is required", + }, + { + name: "empty name should fail", + kind: fleetcmd.KindClusterApprovalRequest, + opts: approveOptions{ + name: "", + }, + wantErr: true, + wantErrMsg: "resource name is required", + }, + { + name: "unsupported kind should fail", + kind: "unsupported", + opts: approveOptions{ + name: "test-name", + }, + wantErr: true, + wantErrMsg: "unsupported resource kind", + }, + { + name: "clusterapprovalrequest without namespace is valid", + kind: fleetcmd.KindClusterApprovalRequest, + opts: approveOptions{ + name: "test-name", + }, + wantErr: false, + }, + { + name: "clusterapprovalrequest with namespace should fail", + kind: fleetcmd.KindClusterApprovalRequest, + opts: approveOptions{ + name: "test-name", + namespace: "some-namespace", + }, + wantErr: true, + wantErrMsg: "does not accept a namespace", + }, + { + name: "approvalrequest without namespace should fail", + kind: fleetcmd.KindApprovalRequest, + opts: approveOptions{ + name: "test-name", + }, + wantErr: true, + wantErrMsg: "namespace is required for", + }, + { + name: "approvalrequest with namespace is valid", + kind: fleetcmd.KindApprovalRequest, + opts: approveOptions{ + name: "test-name", + namespace: "test-namespace", + }, + wantErr: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + cfg, err := fleetcmd.ResolveKind(tc.kind, approveKinds) + if err != nil { + if tc.wantErr && strings.Contains(err.Error(), tc.wantErrMsg) { + return + } + t.Errorf("ResolveKind(%q) = %v, want nil", tc.kind, err) + return + } + + err = tc.opts.validate(cfg) + + if tc.wantErr { + if err == nil { + t.Errorf("validate() = nil, want error") + return + } + if tc.wantErrMsg != "" && !strings.Contains(err.Error(), tc.wantErrMsg) { + t.Errorf("validate() error = %q, want error containing %q", err.Error(), tc.wantErrMsg) + } + } else if err != nil { + t.Errorf("validate() = %v, want nil", err) + } + }) + } +} + +func TestApproveClusterApprovalRequest(t *testing.T) { wantCondition := metav1.Condition{ Type: string(placementv1beta1.ApprovalRequestConditionApproved), Status: metav1.ConditionTrue, @@ -42,37 +144,14 @@ func TestRun(t *testing.T) { tests := []struct { name string - kind string requestName string existingClusterApprovalReq *placementv1beta1.ClusterApprovalRequest wantCondition *metav1.Condition wantErr bool wantErrMsg string }{ - { - name: "empty kind should fail", - kind: "", - requestName: "test-name", - wantErr: true, - wantErrMsg: "resource kind is required", - }, - { - name: "empty name should fail", - kind: "clusterapprovalrequest", - requestName: "", - wantErr: true, - wantErrMsg: "resource name is required", - }, - { - name: "unsupported kind should fail", - kind: "unsupported", - requestName: "test-name", - wantErr: true, - wantErrMsg: "unsupported resource kind \"unsupported\", only 'clusterapprovalrequest' is supported", - }, { name: "successfully approve ClusterApprovalRequest", - kind: "clusterapprovalrequest", requestName: "test-approval", existingClusterApprovalReq: &placementv1beta1.ClusterApprovalRequest{ ObjectMeta: metav1.ObjectMeta{ @@ -92,7 +171,6 @@ func TestRun(t *testing.T) { }, { name: "approve ClusterApprovalRequest with existing conditions", - kind: "clusterapprovalrequest", requestName: "test-approval-existing", existingClusterApprovalReq: &placementv1beta1.ClusterApprovalRequest{ ObjectMeta: metav1.ObjectMeta{ @@ -118,7 +196,6 @@ func TestRun(t *testing.T) { }, { name: "update existing Approved condition", - kind: "clusterapprovalrequest", requestName: "test-approval-update", existingClusterApprovalReq: &placementv1beta1.ClusterApprovalRequest{ ObjectMeta: metav1.ObjectMeta{ @@ -145,7 +222,6 @@ func TestRun(t *testing.T) { }, { name: "ClusterApprovalRequest not found", - kind: "clusterapprovalrequest", requestName: "non-existent-approval", existingClusterApprovalReq: nil, wantErr: true, @@ -167,25 +243,23 @@ func TestRun(t *testing.T) { WithStatusSubresource(&placementv1beta1.ClusterApprovalRequest{}). Build() - // Test the core approval logic directly o := &approveOptions{ - kind: tc.kind, name: tc.requestName, hubClient: fakeClient, } - err := o.run(context.Background()) + err := o.approveClusterApprovalRequest(context.Background()) if tc.wantErr { if err == nil { - t.Errorf("want error but got nil") + t.Errorf("approveClusterApprovalRequest() = nil, want error") return } if tc.wantErrMsg != "" && !strings.Contains(err.Error(), tc.wantErrMsg) { - t.Errorf("want error to contain %q, got %q", tc.wantErrMsg, err.Error()) + t.Errorf("approveClusterApprovalRequest() error = %q, want error containing %q", err.Error(), tc.wantErrMsg) } return } else if err != nil { - t.Errorf("unwanted error: %v", err) + t.Errorf("approveClusterApprovalRequest() = %v, want nil", err) return } @@ -207,6 +281,327 @@ func TestRun(t *testing.T) { } } +func TestApproveApprovalRequest(t *testing.T) { + wantCondition := metav1.Condition{ + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + Status: metav1.ConditionTrue, + Reason: "ApprovalRequestApproved", + Message: "ApprovalRequest has been approved", + } + + tests := []struct { + name string + requestName string + namespace string + existingApprovalReq *placementv1beta1.ApprovalRequest + wantCondition *metav1.Condition + wantErr bool + wantErrMsg string + }{ + { + name: "successfully approve ApprovalRequest", + requestName: "test-approval", + namespace: "test-namespace", + existingApprovalReq: &placementv1beta1.ApprovalRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-approval", + Namespace: "test-namespace", + Generation: 1, + }, + Spec: placementv1beta1.ApprovalRequestSpec{ + TargetUpdateRun: "test-update-run", + TargetStage: "test-stage", + }, + Status: placementv1beta1.ApprovalRequestStatus{ + Conditions: []metav1.Condition{}, + }, + }, + wantCondition: &wantCondition, + wantErr: false, + }, + { + name: "approve ApprovalRequest with existing conditions", + requestName: "test-approval-existing", + namespace: "test-namespace", + existingApprovalReq: &placementv1beta1.ApprovalRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-approval-existing", + Namespace: "test-namespace", + Generation: 2, + }, + Spec: placementv1beta1.ApprovalRequestSpec{ + TargetUpdateRun: "test-update-run", + TargetStage: "test-stage", + }, + Status: placementv1beta1.ApprovalRequestStatus{ + Conditions: []metav1.Condition{ + { + Type: "SomeOtherCondition", + Status: metav1.ConditionTrue, + Reason: "SomeReason", + }, + }, + }, + }, + wantCondition: &wantCondition, + wantErr: false, + }, + { + name: "update existing Approved condition", + requestName: "test-approval-update", + namespace: "test-namespace", + existingApprovalReq: &placementv1beta1.ApprovalRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-approval-update", + Namespace: "test-namespace", + Generation: 3, + }, + Spec: placementv1beta1.ApprovalRequestSpec{ + TargetUpdateRun: "test-update-run", + TargetStage: "test-stage", + }, + Status: placementv1beta1.ApprovalRequestStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + Status: metav1.ConditionFalse, + Reason: "OldReason", + Message: "Old message", + }, + }, + }, + }, + wantCondition: &wantCondition, + wantErr: false, + }, + { + name: "ApprovalRequest not found", + requestName: "non-existent-approval", + namespace: "test-namespace", + existingApprovalReq: nil, + wantErr: true, + wantErrMsg: "not found", + }, + { + name: "ApprovalRequest in wrong namespace not found", + requestName: "test-approval", + namespace: "wrong-namespace", + existingApprovalReq: &placementv1beta1.ApprovalRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-approval", + Namespace: "test-namespace", + Generation: 1, + }, + Spec: placementv1beta1.ApprovalRequestSpec{ + TargetUpdateRun: "test-update-run", + TargetStage: "test-stage", + }, + }, + wantErr: true, + wantErrMsg: "not found", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + scheme := setupScheme(t) + var objects []client.Object + if tc.existingApprovalReq != nil { + objects = append(objects, tc.existingApprovalReq) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + WithStatusSubresource(&placementv1beta1.ApprovalRequest{}). + Build() + + o := &approveOptions{ + name: tc.requestName, + namespace: tc.namespace, + hubClient: fakeClient, + } + err := o.approveApprovalRequest(context.Background()) + + if tc.wantErr { + if err == nil { + t.Errorf("approveApprovalRequest() = nil, want error") + return + } + if tc.wantErrMsg != "" && !strings.Contains(err.Error(), tc.wantErrMsg) { + t.Errorf("approveApprovalRequest() error = %q, want error containing %q", err.Error(), tc.wantErrMsg) + } + return + } else if err != nil { + t.Errorf("approveApprovalRequest() = %v, want nil", err) + return + } + + // Verify the ApprovalRequest was updated correctly. + var updatedAR placementv1beta1.ApprovalRequest + err = fakeClient.Get(context.Background(), client.ObjectKey{Name: tc.requestName, Namespace: tc.namespace}, &updatedAR) + if err != nil { + t.Errorf("failed to get updated ApprovalRequest: %v", err) + return + } + + // Check that the Approved condition exists and is correct. + approvedCondition := meta.FindStatusCondition(updatedAR.Status.Conditions, tc.wantCondition.Type) + if diff := cmp.Diff(tc.wantCondition, approvedCondition, + cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime", "ObservedGeneration")); diff != "" { + t.Errorf("condition mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestRun(t *testing.T) { + wantClusterCondition := metav1.Condition{ + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + Status: metav1.ConditionTrue, + Reason: "ClusterApprovalRequestApproved", + Message: "ClusterApprovalRequest has been approved", + } + wantNamespacedCondition := metav1.Condition{ + Type: string(placementv1beta1.ApprovalRequestConditionApproved), + Status: metav1.ConditionTrue, + Reason: "ApprovalRequestApproved", + Message: "ApprovalRequest has been approved", + } + + tests := []struct { + name string + kind string + requestName string + namespace string + existingClusterApprovalReq *placementv1beta1.ClusterApprovalRequest + existingApprovalReq *placementv1beta1.ApprovalRequest + wantCondition *metav1.Condition + wantErr bool + wantErrMsg string + }{ + { + name: "run dispatches to ClusterApprovalRequest", + kind: fleetcmd.KindClusterApprovalRequest, + requestName: "test-approval", + existingClusterApprovalReq: &placementv1beta1.ClusterApprovalRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-approval", + Generation: 1, + }, + Spec: placementv1beta1.ApprovalRequestSpec{ + TargetUpdateRun: "test-update-run", + TargetStage: "test-stage", + }, + }, + wantCondition: &wantClusterCondition, + wantErr: false, + }, + { + name: "run dispatches to ApprovalRequest", + kind: fleetcmd.KindApprovalRequest, + requestName: "test-approval", + namespace: "test-namespace", + existingApprovalReq: &placementv1beta1.ApprovalRequest{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-approval", + Namespace: "test-namespace", + Generation: 1, + }, + Spec: placementv1beta1.ApprovalRequestSpec{ + TargetUpdateRun: "test-update-run", + TargetStage: "test-stage", + }, + }, + wantCondition: &wantNamespacedCondition, + wantErr: false, + }, + { + name: "unsupported kind returns error", + kind: "unsupportedkind", + requestName: "test-approval", + wantErr: true, + wantErrMsg: "unsupported resource kind", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + scheme := setupScheme(t) + var objects []client.Object + if tc.existingClusterApprovalReq != nil { + objects = append(objects, tc.existingClusterApprovalReq) + } + if tc.existingApprovalReq != nil { + objects = append(objects, tc.existingApprovalReq) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + WithStatusSubresource(&placementv1beta1.ClusterApprovalRequest{}, &placementv1beta1.ApprovalRequest{}). + Build() + + cfg, err := fleetcmd.ResolveKind(tc.kind, approveKinds) + if err != nil { + if tc.wantErr && strings.Contains(err.Error(), tc.wantErrMsg) { + return + } + t.Errorf("ResolveKind(%q) = %v, want nil", tc.kind, err) + return + } + + o := &approveOptions{ + name: tc.requestName, + namespace: tc.namespace, + hubClient: fakeClient, + } + err = o.run(context.Background(), cfg) + + if tc.wantErr { + if err == nil { + t.Errorf("run() = nil, want error") + return + } + if tc.wantErrMsg != "" && !strings.Contains(err.Error(), tc.wantErrMsg) { + t.Errorf("run() error = %q, want error containing %q", err.Error(), tc.wantErrMsg) + } + return + } else if err != nil { + t.Errorf("run() = %v, want nil", err) + return + } + + // Verify the resource was updated correctly based on kind. + if tc.kind == fleetcmd.KindClusterApprovalRequest { + var updatedCAR placementv1beta1.ClusterApprovalRequest + err = fakeClient.Get(context.Background(), client.ObjectKey{Name: tc.requestName}, &updatedCAR) + if err != nil { + t.Errorf("failed to get updated ClusterApprovalRequest: %v", err) + return + } + approvedCondition := meta.FindStatusCondition(updatedCAR.Status.Conditions, tc.wantCondition.Type) + if diff := cmp.Diff(tc.wantCondition, approvedCondition, + cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime", "ObservedGeneration")); diff != "" { + t.Errorf("condition mismatch (-want +got):\n%s", diff) + } + } else if tc.kind == fleetcmd.KindApprovalRequest { + var updatedAR placementv1beta1.ApprovalRequest + err = fakeClient.Get(context.Background(), client.ObjectKey{Name: tc.requestName, Namespace: tc.namespace}, &updatedAR) + if err != nil { + t.Errorf("failed to get updated ApprovalRequest: %v", err) + return + } + approvedCondition := meta.FindStatusCondition(updatedAR.Status.Conditions, tc.wantCondition.Type) + if diff := cmp.Diff(tc.wantCondition, approvedCondition, + cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime", "ObservedGeneration")); diff != "" { + t.Errorf("condition mismatch (-want +got):\n%s", diff) + } + } + }) + } +} + // setupScheme creates a scheme with the necessary APIs for testing. func setupScheme(t *testing.T) *runtime.Scheme { scheme := runtime.NewScheme() diff --git a/tools/fleet/cmd/types.go b/tools/fleet/cmd/types.go new file mode 100644 index 000000000..9fec27d59 --- /dev/null +++ b/tools/fleet/cmd/types.go @@ -0,0 +1,58 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cmd + +import ( + "fmt" + "strings" +) + +// Canonical kind names for approval requests. +const ( + KindClusterApprovalRequest = "clusterapprovalrequest" + KindApprovalRequest = "approvalrequest" +) + +// Aliases for approval request kinds. +const ( + AliasClusterApprovalRequest = "careq" + AliasApprovalRequest = "areq" +) + +// KindConfig defines the configuration for a resource kind. +// This allows commands to handle multiple resource kinds with different +// validation rules and handlers without using switch statements. +type KindConfig struct { + // Canonical is the canonical name of the kind (e.g., "clusterapprovalrequest"). + Canonical string + // Aliases are short aliases for the kind (e.g., "careq"). + Aliases []string +} + +// ResolveKind resolves a kind string (canonical or alias) to its value from the provided map. +// The lookup is case-insensitive. +func ResolveKind[T any](kind string, kinds map[string]*T) (*T, error) { + if kind == "" { + return nil, fmt.Errorf("resource kind is required") + } + lower := strings.ToLower(kind) + cfg, ok := kinds[lower] + if !ok { + return nil, fmt.Errorf("unsupported resource kind %q", kind) + } + return cfg, nil +}