diff --git a/.github/scripts/nb-clean-check.sh b/.github/scripts/nb-clean-check.sh new file mode 100755 index 000000000..5e40b6b50 --- /dev/null +++ b/.github/scripts/nb-clean-check.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +DIRTY=0 + +# Find all .ipynb files in tutorials/ and projects/ +while IFS= read -r -d '' notebook; do + if ! nb-clean check "$notebook"; then + echo "DIRTY: $notebook" + DIRTY=1 + fi +done < <(find tutorials/ projects/ -name '*.ipynb' -type f -print0 2>/dev/null) + +# Export result for GitHub Actions +echo "dirty=$DIRTY" >> "$GITHUB_OUTPUT" + +if [ "$DIRTY" -eq 1 ]; then + echo "::error::Some notebooks are not clean. Run 'nb-clean clean' to fix them." + exit 1 +else + echo "All notebooks are clean." + exit 0 +fi diff --git a/.github/scripts/nb-clean-cleanup.sh b/.github/scripts/nb-clean-cleanup.sh new file mode 100755 index 000000000..256a69718 --- /dev/null +++ b/.github/scripts/nb-clean-cleanup.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Clean all notebooks in tutorials/ and projects/ +find tutorials/ projects/ -name '*.ipynb' -type f -print0 | while IFS= read -r -d '' notebook; do + echo "Cleaning: $notebook" + nb-clean clean "$notebook" +done + +# Configure git user for the cleanup commit +git config user.name "github-actions[bot]" +git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + +# Commit the cleaned notebooks +git add tutorials/ projects/ +git commit -m "chore: clean notebooks with nb-clean" || echo "No changes to commit" + +# Push the cleanup commit back to the PR branch +git push diff --git a/.github/workflows/nb-clean.yml b/.github/workflows/nb-clean.yml new file mode 100644 index 000000000..e72fe503d --- /dev/null +++ b/.github/workflows/nb-clean.yml @@ -0,0 +1,118 @@ +name: nb-clean + +on: + pull_request: + branches: [staging] + paths: + - 'tutorials/**/*.ipynb' + - 'projects/**/*.ipynb' + pull_request_target: + branches: [staging] + paths: + - 'tutorials/**/*.ipynb' + - 'projects/**/*.ipynb' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + pull-requests: write + +jobs: + nb-clean: + runs-on: ubuntu-latest + steps: + - name: Checkout PR head branch + uses: actions/checkout@v6 + with: + ref: ${{ github.event.pull_request.head.ref }} + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Install nb-clean + run: pip install nb-clean + + - name: Detect PR source + id: source + run: | + if [ "${{ github.event.pull_request.head.repo.full_name }}" = "${{ github.repository }}" ]; then + echo "source=main-repo" >> "$GITHUB_OUTPUT" + else + echo "source=fork" >> "$GITHUB_OUTPUT" + fi + + - name: Check if notebooks are clean + id: check + run: | + chmod +x .github/scripts/nb-clean-check.sh + .github/scripts/nb-clean-check.sh + + - name: Clean notebooks (main repo) + if: steps.source.outputs.source == 'main-repo' && steps.check.outputs.dirty == 'true' + run: | + chmod +x .github/scripts/nb-clean-cleanup.sh + .github/scripts/nb-clean-cleanup.sh + + - name: Comment after main repo cleanup + if: steps.source.outputs.source == 'main-repo' && steps.check.outputs.dirty == 'true' + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.pull_request.number }} + body: | + Notebooks have been auto-cleaned with a cleanup commit above. For future PRs, run `uvx nb-clean clean tutorials/ projects/` locally before pushing. + + - name: Create cleanup PR (fork) + id: create-pr + if: steps.source.outputs.source == 'fork' && steps.check.outputs.dirty == 'true' + continue-on-error: true + uses: peter-evans/create-pull-request@v8 + with: + token: ${{ secrets.GITHUB_TOKEN }} + title: "chore: clean notebooks with nb-clean" + body: | + Auto-generated cleanup of Jupyter notebooks using nb-clean. + + This PR applies `nb-clean clean` to all notebooks in this PR's branch. + Please review and merge the changes to accept the cleaned notebooks. + + **To apply locally instead:** + ```bash + pip install nb-clean + nb-clean clean tutorials/ projects/ + ``` + commit-message: "chore: clean notebooks with nb-clean" + push-to-fork: ${{ github.event.pull_request.head.repo.full_name }} + + - name: Comment after fork cleanup PR (if created) + if: steps.source.outputs.source == 'fork' && steps.check.outputs.dirty == 'true' && steps.create-pr.outputs.pull-request-number != '' + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.pull_request.number }} + body: | + A cleanup PR has been created: #${{ steps.create-pr.outputs.pull-request-number }}. + Please review and accept the changes, then merge to accept the cleaned notebooks. + + - name: Comment with cleanup instructions (fallback) + if: steps.source.outputs.source == 'fork' && steps.check.outputs.dirty == 'true' && steps.create-pr.outcome == 'failure' + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.pull_request.number }} + body: | + **Notebook Cleanup Required** + + I could not automatically push cleanup changes to your fork. Please clean your notebooks locally: + + ```bash + pip install nb-clean + nb-clean clean tutorials/ projects/ + ``` + + Then push your cleaned changes to the PR branch. + + Alternatively, enable **"Allow edits from maintainers"** in your fork settings, or check if there's a cleanup PR already created for this PR. diff --git a/README.md b/README.md index 36080fd02..b9a7eb43d 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,23 @@ Derivative works may use the license that is more appropriate to the relevant co [bsd-3]: https://opensource.org/licenses/BSD-3-Clause [bsd-3-shield]: https://img.shields.io/badge/License-BSD_3--Clause-blue.svg +## Contributing Notebooks + +When contributing Jupyter notebooks, please ensure they are clean for version control: + +```bash +pip install nb-clean +nb-clean clean tutorials/ projects/ +``` + +Or use the one-liner: + +```bash +uvx nb-clean clean tutorials/ projects/ +``` + +Notebooks will be automatically cleaned when you open a PR. If the cleanup fails to push to your fork, you'll be asked to run the cleanup locally. + ## Contributors ✨ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): diff --git a/docs/plans/2026-06-10-nb-clean-design.md b/docs/plans/2026-06-10-nb-clean-design.md new file mode 100644 index 000000000..2f808b2ab --- /dev/null +++ b/docs/plans/2026-06-10-nb-clean-design.md @@ -0,0 +1,95 @@ +# nb-clean GitHub Action Design + +**Date:** 2026-06-10 +**Status:** Approved + +## Overview + +A GitHub Action workflow that automatically cleans Jupyter notebooks when pull requests are opened against the `staging` branch. The workflow uses `nb-clean` to strip cell execution counts, metadata, and outputs from notebooks in `tutorials/` and `projects/`. + +The workflow handles two cases differently based on whether the PR comes from a branch on the main repo or from a fork. + +## Trigger + +- **Workflow file:** `.github/workflows/nb-clean.yml` +- **Trigger event:** `pull_request` to `staging` branch +- **Path filter:** `tutorials/**/*.ipynb`, `projects/**/*.ipynb` +- **Skip condition:** If no `.ipynb` files changed in the PR, the workflow skips entirely +- **Skip condition:** If all changed notebooks are already clean, the workflow passes silently + +## Main Repo Branch Flow + +When the PR source branch is on the main repo (`github.event.pull_request.head.repo.full_name == github.repository`): + +1. Checkout the PR head branch +2. Run `nb-clean check` on all `.ipynb` files in `tutorials/` and `projects/` +3. If dirty, run `nb-clean clean` to fix in place +4. Push a cleanup commit back to the same branch +5. Fail the CI check and comment on the PR with brief instructions + +Instructions comment: +> Notebooks have been auto-cleaned with a cleanup commit above. For future PRs, run `uvx nb-clean clean tutorials/ projects/` locally before pushing. + +## Fork Branch Flow + +When the PR comes from a fork: + +1. Checkout the PR head branch (from the fork) +2. Run `nb-clean check` on all `.ipynb` files in `tutorials/` and `projects/` +3. If dirty, run `nb-clean clean` to fix in place +4. Use `peter-evans/create-pull-request` action with `push-to-fork` option to push a cleanup branch to the fork and create a PR +5. Comment on the original PR with a link to the cleanup PR + +**Fallback:** If the action cannot push to the fork (e.g., "Allow edits from maintainers" is disabled), post a comment with a GitHub gist containing the cleaned notebooks and manual instructions. + +## Error Handling + +- If `nb-clean` is not installed or fails, the workflow fails gracefully with a clear error message +- If git push fails, post a comment explaining the issue and providing manual instructions +- If the PR is already clean, the workflow passes silently +- If no `.ipynb` files changed, the workflow is skipped + +## Edge Cases + +- **Multiple commits:** Workflow re-runs on each push, re-applies cleanup +- **Rebase conflicts:** If contributor rebases after cleanup commit, git will fail and comment with instructions +- **Large PRs:** `nb-clean check` runs against all changed `.ipynb` files; protected by GitHub's 6-hour job limit + +## Architecture + +### Files + +``` +.github/ + workflows/ + nb-clean.yml # Main workflow file + scripts/ + nb-clean-check.sh # Bash script: runs nb-clean check, returns exit code + nb-clean-cleanup.sh # Bash script: runs nb-clean clean, commits cleanup +``` + +### Workflow Structure + +The workflow uses both `pull_request` and `pull_request_target` triggers. `pull_request` handles the main repo case; `pull_request_target` ensures fork PRs also get cleanup PRs with write access to the fork. + +Permissions: +- `contents: write` — for pushing cleanup commits +- `pull-requests: write` — for creating cleanup PRs + +### Key Steps + +1. Checkout PR head branch +2. Set up Python 3.12 +3. Install `nb-clean` via pip +4. Detect PR source (main repo vs fork) using `github.event.pull_request.head.repo.full_name` +5. Run `nb-clean check` on changed notebooks +6. Branch based on source: + - Main repo: run cleanup script, push commit + - Fork: use `peter-evans/create-pull-request@v8` with `push-to-fork` + +## Tools & Dependencies + +- **nb-clean** — Python package for cleaning Jupyter notebooks (`pip install nb-clean`) +- **peter-evans/create-pull-request@v8** — GitHub Action for creating/updating PRs, with `push-to-fork` support +- **actions/checkout@v6** — Check out the PR head branch +- **actions/setup-python@v6** — Set up Python 3.12