Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ jobs:
fail-fast: false
matrix:
include:
- {os: windows-latest, python: "3.11", dask-version: "2026.3.0", name: "min dask"}
- {os: windows-latest, python: "3.12", dask-version: "2026.3.0", name: "min dask"}
- {os: windows-latest, python: "3.14", dask-version: "latest"}
- {os: ubuntu-latest, python: "3.11", dask-version: "latest"}
- {os: ubuntu-latest, python: "3.12", dask-version: "latest"}
- {os: ubuntu-latest, python: "3.14", dask-version: "latest"}
- {os: macos-latest, python: "3.11", dask-version: "latest"}
- {os: macos-latest, python: "3.12", dask-version: "latest"}
- {os: macos-latest, python: "3.14", prerelease: "allow", name: "prerelease"}
env:
OS: ${{ matrix.os }}
Expand All @@ -41,7 +41,7 @@ jobs:
- name: Install dependencies
run: |
if [[ "${PRERELEASE}" == "allow" ]]; then
sed -i '' 's/requires-python.*//' pyproject.toml # otherwise uv complains that anndata requires python>=3.12 and we only do >=3.11 😱
sed -i '' 's/requires-python.*//' pyproject.toml # drop the cap so uv can resolve prerelease deps
uv add git+https://github.com/scverse/anndata.git
uv add pandas>=3.dev0
fi
Expand Down
1 change: 1 addition & 0 deletions docs/api/datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ Convenience small datasets

.. autofunction:: blobs
.. autofunction:: blobs_annotating_element
.. autofunction:: cells
.. autofunction:: raccoon
```
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ maintainers = [
urls.Documentation = "https://spatialdata.scverse.org/en/latest"
urls.Source = "https://github.com/scverse/spatialdata.git"
urls.Home-page = "https://github.com/scverse/spatialdata.git"
requires-python = ">=3.11"
requires-python = ">=3.12"
dynamic= [
"version" # allow version to be set by git tags
]
Expand Down Expand Up @@ -45,6 +45,7 @@ dependencies = [
"spatial_image>=1.2.3",
"scikit-image",
"scipy!=1.17.0",
"scverse-misc[datasets]>=0.0.10",
"typing_extensions>=4.8.0",
"universal_pathlib>=0.2.6",
"xarray>=2024.10.0",
Expand Down Expand Up @@ -145,7 +146,7 @@ exclude = [

]
line-length = 120
target-version = "py311"
target-version = "py312"

[tool.ruff.lint]
ignore = [
Expand Down
33 changes: 32 additions & 1 deletion src/spatialdata/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
)
from spatialdata.transformations import Identity

__all__ = ["blobs", "raccoon"]
__all__ = ["blobs", "cells", "raccoon"]


def blobs(
Expand Down Expand Up @@ -79,6 +79,37 @@ def raccoon() -> SpatialData:
return RaccoonDataset().raccoon()


def cells(path: str | None = None) -> SpatialData:
"""
Cells dataset.

Download the ``cells`` example dataset and load it as a :class:`~spatialdata.SpatialData`
object. The download is hash-verified and cached, so repeated calls reuse the local copy
instead of downloading again.

Parameters
----------
path
Directory in which to cache the downloaded data. If `None`, the default OS cache
location is used (:func:`pooch.os_cache` for ``"spatialdata"``).

Returns
-------
SpatialData object with the cells dataset.
"""
import importlib.resources
from pathlib import Path

import pooch
from scverse_misc.datasets import fetch, parse_registry

cache_dir = Path(path) if path is not None else Path(pooch.os_cache("spatialdata"))
registry = importlib.resources.files("spatialdata").joinpath("datasets.yaml")
with importlib.resources.as_file(registry) as registry_path:
base_url, datasets = parse_registry(registry_path)
return fetch(datasets["cells"], cache_dir, base_url=base_url)


class RaccoonDataset:
"""Raccoon dataset."""

Expand Down
15 changes: 15 additions & 0 deletions src/spatialdata/datasets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Registry of downloadable example datasets for ``spatialdata.datasets``.
#
# Parsed by ``scverse_misc.datasets.parse_registry`` and fetched (downloaded,
# hash-verified, cached and loaded) via ``scverse_misc.datasets.fetch``.
#
# type: spatialdata -> a .zip that extracts to a single .zarr store
base_url: https://exampledata.scverse.org/spatialdata/
datasets:
cells:
type: spatialdata
doc_header: Cells dataset as a SpatialData object.
files:
- name: cells.zip
s3_key: cells.zip
sha256: dc9613cb9e16fd2cd8d83f3a9586eeda4af5ba8ba366f1066efb51305820c5fb
30 changes: 29 additions & 1 deletion tests/datasets/test_datasets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from __future__ import annotations

from spatialdata.datasets import blobs, raccoon
import importlib.resources

import pytest

from spatialdata import SpatialData
from spatialdata.datasets import blobs, cells, raccoon


def test_datasets() -> None:
Expand All @@ -26,3 +31,26 @@ def test_datasets() -> None:
assert sdata_raccoon.images["raccoon"].shape == (3, 768, 1024)
assert sdata_raccoon.labels["segmentation"].shape == (768, 1024)
_ = str(sdata_raccoon)


def test_cells_registry() -> None:
# Network-free: the shipped registry parses and exposes the cells dataset.
from scverse_misc.datasets import parse_registry

registry = importlib.resources.files("spatialdata").joinpath("datasets.yaml")
with importlib.resources.as_file(registry) as registry_path:
base_url, datasets = parse_registry(registry_path)

assert base_url == "https://exampledata.scverse.org/spatialdata/"
entry = datasets["cells"]
assert entry.type == "spatialdata"
file = entry.file(name="cells.zip")
assert file.sha256 == "dc9613cb9e16fd2cd8d83f3a9586eeda4af5ba8ba366f1066efb51305820c5fb"
assert file.resolve_url(base_url) == "https://exampledata.scverse.org/spatialdata/cells.zip"


@pytest.mark.slow
def test_cells_download(tmp_path) -> None:
# Downloads ~3 MB from the scverse example data bucket; opt out with `-m "not slow"`.
sdata = cells(path=str(tmp_path))
assert isinstance(sdata, SpatialData)
Loading