From abc909cb2d998e1c9ae8e7a92a86f07697e44210 Mon Sep 17 00:00:00 2001 From: Erika Date: Thu, 23 Apr 2026 09:41:39 -0700 Subject: [PATCH] Add nbdime for diffing and merging notebooks --- README.md | 54 +++++++++++++++++++++++++++++++++++++++++++----- pyproject.toml | 3 +++ uv.lock | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5aa9c597a..424396005 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,68 @@ # data-analyses + Place for sharing quick reports, and works in progress This repository is for quick sharing of works in progress and simple analyses. For collaborative short-term tasks, create a new folder and work off a separate branch. For longer-term projects, consider making a new repository! + ## Using this Repo * Use [this link](https://docs.calitp.org/data-infra/analytics_tools/saving_code.html#onboarding-setup) to get started in JupyterHub, set up SSH, and start commiting to the repo! + +### JupyterHub Developers + +If you are developing in JupyterHub, follow the [JupyterHub setup docs](https://docs.calitp.org/data-infra/analytics_tools/jupyterhub.html). + + ### Contributing -#### Pre-commit -This repository uses pre-commit hooks to format code, including [Black](https://black.readthedocs.io/en/stable/index.html). This ensures baseline consistency in code formatting. +Follow these steps to start contributing: + +1. [Clone](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) this `data-analyses` repo. + +2. From the repo root (`data-analyses/`), run `make install_env` (runs `uv sync` + pre-commit setup) + +3. In JupyterHub, select the **"Pyproject Local"** kernel when opening a notebook + +> [!NOTE] +> If you run into the error `No such file or directory`, you may need to [install uv](https://docs.astral.sh/uv/getting-started/installation/) running `pip install uv`. + +### uv + +This repository uses uv for package management. To learn more go to [uv documentation](https://docs.astral.sh/uv/concepts/projects/dependencies/). -> [!IMPORTANT] -> Before contributing to this project, please install dependencies and pre-commit by running `make install_env` in the root of the repo. +Basic commands: -Once installed, pre-commit checks will run before you can make commits locally. If a pre-commit check fails, it will need to be addressed before you can make your commit. Many formatting issues are fixed automatically within the pre-commit actions, so check the changes made by pre-commit on failure -- they may have automatically addressed the issues that caused the failure, in which case you can simply re-add the files, re-attempt the commit, and the checks will then succeed. +* `uv sync` install missing packages, update existing ones, and remove unnecessary ones to ensure the environment matches the lockfile. +* `uv add ` include and install a new package to the main project. +* `uv add --dev` include and install new packages/dependencies to the `dev` group. +* `uv add --portfolio` include and install new packages/dependencies to the `portfolio` group. +* `uv add --test` include and install new packages/dependencies used only for testing under the `test` group. +* `uv remove ` remove and uninstall packages/dependencies from the project. + +### nbdime + +[`nbdime`](https://github.com/jupyter/nbdime) provides command-line tools for diffing and merging notebooks. + +Basic commands: + +* `nbdiff` compare notebooks in a terminal-friendly way. +* `nbshow` present a single notebook in a terminal-friendly way. + +### Pre-commit + +This repository uses pre-commit hooks to format code, including [Black](https://black.readthedocs.io/en/stable/index.html). This ensures baseline consistency in code formatting. + +Pre-commit checks will run before you can make commits locally. If a pre-commit check fails, it will need to be addressed before you can make your commit. +Many formatting issues are fixed automatically within the pre-commit actions, so check the changes made by pre-commit on failure -- they may have automatically addressed the issues that caused the failure, in which case you can simply re-add the files, re-attempt the commit, and the checks will then succeed. Installing pre-commit locally saves time dealing with formatting issues on pull requests. There is a [GitHub Action](./.github/workflows/lint.yml) that runs pre-commit on all files, not just changed ones, as part of our continuous integration. + ## Quick Links - Get Started in Data Analysis #### Data Analytics Documentation - Welcome @@ -32,7 +73,9 @@ https://docs.calitp.org/data-infra/analytics_welcome/overview.html https://docs.calitp.org/data-infra/analytics_tools/overview.html + ## Publishing Reports + [The sites folder](./portfolio/sites/) contains the YAML files that drive sites deployed to [https://analysis.calitp.org/](https://analysis.calitp.org/); the existing sites can be used as examples/templates for deploying additional sites. Also, the @@ -40,6 +83,7 @@ Data Services Documentation has a [specific chapter](https://docs.calitp.org/dat dedicated to various ways to publish data. ### Caveats (when using the portfolio site) + Jupyter Book/Sphinx do not play nicely with Markdown headers written out in `display()` calls. Therefore, [portfolio.py](./portfolio.py) uses a custom Papermill engine to template Markdown cells directly, following Python formatted-string diff --git a/pyproject.toml b/pyproject.toml index 981494465..a5501a86a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,9 @@ name = "cal-itp-data-analyses" version = "0.1.0" requires-python = ">=3.11.0, <3.12.0" +dependencies = [ + "nbdime>=4.0.4", +] [tool.uv.workspace] members = [ diff --git a/uv.lock b/uv.lock index 93544d1a6..3b799b4a7 100644 --- a/uv.lock +++ b/uv.lock @@ -349,6 +349,9 @@ wheels = [ name = "cal-itp-data-analyses" version = "0.1.0" source = { virtual = "." } +dependencies = [ + { name = "nbdime" }, +] [package.dev-dependencies] dev = [ @@ -385,6 +388,7 @@ test = [ ] [package.metadata] +requires-dist = [{ name = "nbdime", specifier = ">=4.0.4" }] [package.metadata.requires-dev] dev = [ @@ -981,6 +985,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/15/cf2a69ade4b194aa524ac75112d5caac37414b20a3a03e6865dfe0bd1539/geopy-2.4.1-py3-none-any.whl", hash = "sha256:ae8b4bc5c1131820f4d75fce9d4aaaca0c85189b3aa5d64c3dcaf5e3b7b882a7", size = 125437, upload-time = "2023-11-23T21:49:30.421Z" }, ] +[[package]] +name = "gitdb" +version = "4.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "smmap" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" }, +] + +[[package]] +name = "gitpython" +version = "3.1.49" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gitdb" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/63/210aaa302d6a0a78daa67c5c15bbac2cad361722841278b0209b6da20855/gitpython-3.1.49.tar.gz", hash = "sha256:42f9399c9eb33fc581014bedd76049dfbaf6375aa2a5754575966387280315e1", size = 219367, upload-time = "2026-04-29T00:31:20.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/6f/b842bfa6f21d6f87c57f9abf7194225e55279d96d869775e19e9f7236fc5/gitpython-3.1.49-py3-none-any.whl", hash = "sha256:024b0422d7f84d15cd794844e029ffebd4c5d42a7eb9b936b458697ef550a02c", size = 212190, upload-time = "2026-04-29T00:31:18.412Z" }, +] + [[package]] name = "google-api-core" version = "2.30.0" @@ -2029,6 +2057,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/4b/8d5f796a792f8a25f6925a96032f098789f448571eb92011df1ae59e8ea8/nbconvert-7.17.0-py3-none-any.whl", hash = "sha256:4f99a63b337b9a23504347afdab24a11faa7d86b405e5c8f9881cd313336d518", size = 261510, upload-time = "2026-01-29T16:37:46.322Z" }, ] +[[package]] +name = "nbdime" +version = "4.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama" }, + { name = "gitpython" }, + { name = "jinja2" }, + { name = "jupyter-server" }, + { name = "nbformat" }, + { name = "pygments" }, + { name = "requests" }, + { name = "tornado" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0c/22/d1ae77115b96b2ff8df6bbc54f8651eeb02d5716cea9079cdf94eaf6bb8b/nbdime-4.0.4.tar.gz", hash = "sha256:8cd25ecfeeb5105d563237d7f64eb4748058fba9bba9ab3892a1ff61e177ce16", size = 9454602, upload-time = "2026-02-10T15:02:02.995Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/dd/75a852b276f29e131c33083cb57dfaf3996c94ac2e07b1c90cf5950b7505/nbdime-4.0.4-py3-none-any.whl", hash = "sha256:4cdfb628c6625fe2c6cada2fe24917fa461e542cfe66a54096081af3fb1eeec4", size = 5917507, upload-time = "2026-02-10T15:02:00.216Z" }, +] + [[package]] name = "nbformat" version = "5.10.4" @@ -3318,6 +3365,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "smmap" +version = "5.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1f/ea/49c993d6dfdd7338c9b1000a0f36817ed7ec84577ae2e52f890d1a4ff909/smmap-5.0.3.tar.gz", hash = "sha256:4d9debb8b99007ae47165abc08670bd74cb74b5227dda7f643eccc4e9eb5642c", size = 22506, upload-time = "2026-03-09T03:43:26.1Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/d4/59e74daffcb57a07668852eeeb6035af9f32cbfd7a1d2511f17d2fe6a738/smmap-5.0.3-py3-none-any.whl", hash = "sha256:c106e05d5a61449cf6ba9a1e650227ecfb141590d2a98412103ff35d89fc7b2f", size = 24390, upload-time = "2026-03-09T03:43:24.361Z" }, +] + [[package]] name = "sniffio" version = "1.3.1"