diff --git a/.cursor/rules/ce-onprem-public-dispatch.mdc b/.cursor/rules/ce-onprem-public-dispatch.mdc new file mode 100644 index 00000000..d183a594 --- /dev/null +++ b/.cursor/rules/ce-onprem-public-dispatch.mdc @@ -0,0 +1,25 @@ +--- +description: Maintain CE public-to-private on-prem deployment dispatch contract +globs: .github/workflows/*.y*ml +alwaysApply: false +--- + +# CE On-Prem Public Dispatch Contract + +- Scope: this rule applies when editing CE release and on-prem dispatch workflows. +- Treat this path as a contract chain: `release.yml` -> `deploy_ce_onprem_public.yaml` -> `ce-deployment` `repository_dispatch` -> Jenkins `mlrunce_deploy_onprem_v2/dev`. + +## Contract Rules + +- Keep the dispatch `event_type` as `deploy-ce-onprem` unless both repos are updated together. +- Keep payload keys stable (`version`, `system_id`, `run_naipi`, `source_repo`, `triggered_by`) or update the private workflow extractor in the same change. +- Preserve release tag expectations (`mlrun-ce-`) and the chart-version handoff from `release.yml`. +- Preserve repository resolution guard (`owner/repo`) for `DEPLOYMENT_REPO`. +- Keep run tracking robust: dispatch-time correlation is required; if changing polling logic, guard against selecting an unrelated `repository_dispatch` run. + +## Review Checklist Before Merging + +- Validate that new/renamed payload keys are consumed in `ce-deployment/.github/workflows/deploy_ce_onprem.yaml`. +- Validate `version` transformation remains consistent with chart release tags. +- Validate failures in private workflow propagation fail the public workflow (do not silently pass). +- Validate timeout and retry values still cover long Jenkins runs (up to ~6h). diff --git a/.github/workflows/deploy_ce_onprem_public.yaml b/.github/workflows/deploy_ce_onprem_public.yaml index 472f593a..889a76d1 100644 --- a/.github/workflows/deploy_ce_onprem_public.yaml +++ b/.github/workflows/deploy_ce_onprem_public.yaml @@ -23,6 +23,7 @@ jobs: trigger-deployment: name: Trigger Deployment in Private Repo runs-on: ubuntu-latest + timeout-minutes: 420 # 360-min Jenkins job + buffer steps: - name: Resolve target repository id: repo-info @@ -49,10 +50,14 @@ jobs: private-key: ${{ secrets.GH_APP_PRIVATE_KEY }} owner: ${{ steps.repo-info.outputs.owner }} repositories: ${{ steps.repo-info.outputs.repo }} + permission-contents: write + permission-actions: read - name: Send Repository Dispatch to Private Deployment Repo + id: dispatch run: | DEPLOYMENT_REPO="${{ steps.repo-info.outputs.full_name }}" + DISPATCH_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") curl -X POST \ -H "Accept: application/vnd.github+json" \ -H "Authorization: token ${{ steps.app-token.outputs.token }}" \ @@ -70,5 +75,75 @@ jobs: triggered_by: "${{ github.actor }}" } }')" - + + echo "dispatch_time=$DISPATCH_TIME" >> $GITHUB_OUTPUT echo "Deployment triggered in private repository" + + - name: Wait for private deployment workflow + env: + APP_ID: ${{ secrets.GH_APP_ID }} + APP_PRIVATE_KEY: ${{ secrets.GH_APP_PRIVATE_KEY }} + INSTALLATION_ID: ${{ steps.app-token.outputs.installation-id }} + REPO: ${{ steps.repo-info.outputs.full_name }} + DISPATCH_TIME: ${{ steps.dispatch.outputs.dispatch_time }} + run: | + set -euo pipefail + + # Mint a short-lived installation token from the App JWT. + mint_token() { + local now iat exp header payload unsigned sig jwt + now=$(date +%s); iat=$((now - 60)); exp=$((now + 540)) + b64() { openssl base64 -e -A | tr '+/' '-_' | tr -d '='; } + header=$(printf '{"alg":"RS256","typ":"JWT"}' | b64) + payload=$(printf '{"iat":%d,"exp":%d,"iss":"%s"}' "$iat" "$exp" "$APP_ID" | b64) + unsigned="${header}.${payload}" + sig=$(printf '%s' "$unsigned" \ + | openssl dgst -sha256 -sign <(printf '%s' "$APP_PRIVATE_KEY") -binary | b64) + jwt="${unsigned}.${sig}" + curl -sf -X POST \ + -H "Authorization: Bearer $jwt" \ + -H "Accept: application/vnd.github+json" \ + "https://api.github.com/app/installations/${INSTALLATION_ID}/access_tokens" \ + | jq -r '.token' + } + + TOKEN=$(mint_token) + + # Find the run triggered after our dispatch (eventual consistency). + RUN_ID="" + for attempt in $(seq 1 10); do + RUN_ID=$(curl -sf \ + -H "Authorization: token $TOKEN" \ + -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/${REPO}/actions/runs?event=repository_dispatch&created=>=${DISPATCH_TIME}&per_page=5" \ + | jq -r '.workflow_runs | sort_by(.created_at) | last | .id // empty') + [ -n "$RUN_ID" ] && break + echo "Waiting for workflow run to appear (attempt $attempt)..." + sleep 30 + done + + if [ -z "$RUN_ID" ]; then + echo "::error::Could not find a triggered workflow run after dispatch." + exit 1 + fi + echo "Tracking run ${RUN_ID}" + + # Poll until complete, re-minting the token each loop so it never expires. + while true; do + TOKEN=$(mint_token) + RUN=$(curl -sf \ + -H "Authorization: token $TOKEN" \ + -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/${REPO}/actions/runs/${RUN_ID}") + STATUS=$(echo "$RUN" | jq -r '.status') + CONCLUSION=$(echo "$RUN" | jq -r '.conclusion // empty') + echo " status=$STATUS conclusion=${CONCLUSION:-pending}" + [ "$STATUS" = "completed" ] && break + sleep 60 + done + + if [ "$CONCLUSION" != "success" ]; then + echo "::error::Private deployment workflow concluded: $CONCLUSION" + exit 1 + fi + echo "Deployment workflow succeeded."