diff --git a/.gitignore b/.gitignore
index 63b1c25..003c2d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,6 @@ __pycache__/
 # docs
 /docs/generated/
 /docs/_build/
+
+# lockfiles (library: not committed)
+/uv.lock
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 582e6bb..48fe5f7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -49,3 +49,4 @@ repos:
       - sphinx
       - sphinx-autodoc-typehints
       - sphinxcontrib-katex
+      - types-PyYAML
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e8048ef..917ed06 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,13 @@ and this project adheres to [Semantic Versioning][].
 ### Added
 
 - A Sphinx extension to take care of documentation. This moves docstring processing from import time to documentation building time.
+- A reusable `datasets` subpackage (behind the `datasets` extra): typed `DatasetEntry`/
+  `FileEntry` + `parse_registry` (YAML), a thin pooch-based `fetch` (SHA-256 verification,
+  retries, archive processors), and a pluggable `type -> loader` registry
+  (`register_loader`) so packages can share dataset-download infrastructure. Ships built-in
+  `anndata` and `spatialdata` loaders (the latter behind the `spatialdata` extra); other
+  types are consumer-registered.
+- `anndata` is now a core dependency.
 
 ### Changed
 
diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst
index 7b4a0cf..ab448ca 100644
--- a/docs/_templates/autosummary/class.rst
+++ b/docs/_templates/autosummary/class.rst
@@ -7,55 +7,49 @@
 .. autoclass:: {{ objname }}
 
 {% block attributes %}
-{% if attributes %}
+{% for item in attributes %}
+{% if loop.length != 1 %}
+{% if loop.first %}
 Attributes table
 ~~~~~~~~~~~~~~~~
 
 .. autosummary::
-{% for item in attributes %}
+{% endif %}
     ~{{ name }}.{{ item }}
-{%- endfor %}
 {% endif %}
+{%- endfor %}
 {% endblock %}
 
 {% block methods %}
-{% if methods %}
+{% for item in all_methods if item == '__call__' or not item.startswith('__') %}
+{% if loop.length != 1 %}
+{% if loop.first %}
 Methods table
 ~~~~~~~~~~~~~
 
 .. autosummary::
-{% for item in methods %}
-    {%- if item != '__init__' %}
+{% endif %}
     ~{{ name }}.{{ item }}
-    {%- endif -%}
-{%- endfor %}
 {% endif %}
+{%- endfor %}
 {% endblock %}
 
 {% block attributes_documentation %}
-{% if attributes %}
+{% for item in attributes %}
+{% if loop.first %}
 Attributes
 ~~~~~~~~~~
-
-{% for item in attributes %}
-
+{% endif %}
 .. autoattribute:: {{ [objname, item] | join(".") }}
 {%- endfor %}
-
-{% endif %}
 {% endblock %}
 
 {% block methods_documentation %}
-{% if methods %}
+{% for item in all_methods if item == '__call__' or not item.startswith('__') %}
+{% if loop.first %}
 Methods
 ~~~~~~~
-
-{% for item in methods %}
-{%- if item != '__init__' %}
-
+{% endif %}
 .. automethod:: {{ [objname, item] | join(".") }}
-{%- endif -%}
 {%- endfor %}
-
-{% endif %}
 {% endblock %}
diff --git a/docs/api.md b/docs/api.md
index 5717c4b..c23078c 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -1,10 +1,11 @@
 # API
 
 ```{eval-rst}
-.. currentmodule:: scverse_misc
+.. module:: scverse_misc
 .. toctree::
 ```
 
+(extensions)=
 ## Extensions
 
 ```{eval-rst}
@@ -13,7 +14,9 @@
 
     make_register_namespace_decorator
 ```
+
 Types used by the former:
+
 ```{eval-rst}
 .. autosummary::
     :toctree: generated
@@ -23,7 +26,9 @@ Types used by the former:
 
 *Examples:* {ref}`example-extension-namespaces`
 
+(deprecations)=
 ## Deprecations
+
 ```{eval-rst}
 .. autosummary::
    :toctree: generated
@@ -35,6 +40,7 @@ Types used by the former:
 
 *Examples:* {ref}`example-deprecating-a-function`, {ref}`example-deprecating-a-function-argument`, {ref}`example-settings-class`
 
+(settings)=
 ## Settings
 
 ```{eval-rst}
@@ -43,9 +49,28 @@ Types used by the former:
 
    api/settings
 
-+---------------------------+----------------------------------+
-| :class:`Settings` ()      | Base class for package settings. |
-+---------------------------+----------------------------------+
+.. autosummary::
+   :signatures: short
+
+   Settings
 ```
 
 *Examples:* {ref}`example-settings-class`
+
+(datasets)=
+## Datasets (`scverse_misc.datasets`)
+
+```{eval-rst}
+.. automodule:: scverse_misc.datasets
+.. autosummary::
+    :toctree: generated
+
+    DatasetEntry
+    FileEntry
+    parse_registry
+    fetch
+    register_loader
+    available_loaders
+    Loader
+    DownloadCB
+```
diff --git a/docs/conf.py b/docs/conf.py
index 99d609f..22717f4 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -13,6 +13,7 @@
 
 from sphinxcontrib import katex
 
+
 HERE = Path(__file__).parent
 sys.path.insert(0, str(HERE / "extensions"))
 sys.path.insert(0, str(HERE / "sphinx_ext_examples"))
@@ -104,6 +105,7 @@
     "scipy": ("https://docs.scipy.org/doc/scipy", None),
     "pandas": ("https://pandas.pydata.org/docs/", None),
     "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None),
+    "pooch": ("https://www.fatiando.org/pooch/latest/", None),
     "pydantic": ("https://pydantic.dev/docs/validation/", None),
 }
 
@@ -137,5 +139,5 @@
 nitpick_ignore: list[tuple[str, str]] = [
     # If building the documentation fails because of a missing link that is outside your control,
     # you can add an exception to this list.
-    #     ("py:class", "igraph.Graph"),
+    ("py:class", "scverse_misc._deprecated.CallableWithDeprecatedArg"),
 ]
diff --git a/pyproject.toml b/pyproject.toml
index 424561f..0cf50eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,11 +22,14 @@ classifiers = [
 ]
 dynamic = [ "version" ]
 dependencies = [
+  "anndata",
   # for debug logging (referenced from the issue template)
   "session-info2",
   "typing-extensions; python_version<'3.13'",
 ]
+optional-dependencies.datasets = [ "pooch", "pyyaml", "tqdm" ]
 optional-dependencies.settings = [ "pydantic-settings", "python-dotenv" ]
+optional-dependencies.spatialdata = [ "spatialdata" ]
 optional-dependencies.sphinx = [ "pydocstring-rs>=0.1.13", "sphinx>=9" ]
 # https://docs.pypi.org/project_metadata/#project-urls
 urls.Documentation = "https://scverse-misc.readthedocs.io/"
@@ -38,13 +41,20 @@ dev = [
   "pre-commit",
   "twine>=4.0.2",
 ]
-test = [ "coverage>=7.10", "numpy", "pytest", "scverse-misc[settings,sphinx]", "sphinx", "sphinx-autodoc-typehints" ]
+test = [
+  "coverage>=7.10",
+  "numpy",
+  "pytest",
+  "scverse-misc[datasets,settings,sphinx]",
+  "sphinx",
+  "sphinx-autodoc-typehints"
+]
 doc = [
   "ipykernel",
   "ipython",
   "myst-nb>=1.1",
   "pandas",
-  "scverse-misc[settings,sphinx]",
+  "scverse-misc[datasets,settings,sphinx]",
   "sphinx>=8.1",
   "sphinx-autodoc-typehints",
   "sphinx-book-theme>=1",
diff --git a/src/scverse_misc/datasets/__init__.py b/src/scverse_misc/datasets/__init__.py
new file mode 100644
index 0000000..f22cfb7
--- /dev/null
+++ b/src/scverse_misc/datasets/__init__.py
@@ -0,0 +1,25 @@
+"""Reusable, declarative dataset download for scverse packages.
+
+Parse a YAML registry into typed :class:`DatasetEntry` objects, then download and load
+one with :func:`fetch`. Dataset ``type`` strings are dispatched against a pluggable loader
+registry (:func:`register_loader`); ``anndata`` and ``spatialdata`` loaders ship built in.
+
+Requires the ``datasets`` extra (``pip install scverse-misc[datasets]``); the built-in
+``spatialdata`` loader additionally needs the ``spatialdata`` extra.
+"""
+
+from __future__ import annotations
+
+from ._fetcher import DownloadCB, Loader, available_loaders, fetch, register_loader
+from ._registry import DatasetEntry, FileEntry, parse_registry
+
+__all__ = [
+    "FileEntry",
+    "DatasetEntry",
+    "parse_registry",
+    "fetch",
+    "register_loader",
+    "available_loaders",
+    "Loader",
+    "DownloadCB",
+]
diff --git a/src/scverse_misc/datasets/_fetcher.py b/src/scverse_misc/datasets/_fetcher.py
new file mode 100644
index 0000000..87ee0fb
--- /dev/null
+++ b/src/scverse_misc/datasets/_fetcher.py
@@ -0,0 +1,132 @@
+"""Download + load a dataset: a thin ``fetch`` over pooch + a pluggable ``type -> loader`` registry.
+
+A loader is a callable ``(entry, target_dir, download, **kwargs) -> object`` where ``download``
+is ``(FileEntry, dest=None, processor=None) -> path`` (pooch under the hood: hashing, caching,
+retries, and archive processors). ``anndata`` and ``spatialdata`` loaders ship built in.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Protocol, cast, overload
+
+if TYPE_CHECKING:
+    from ._registry import DatasetEntry, FileEntry
+
+    if TYPE_CHECKING:  # sphinx tries to import the above TYPE_CHECKING block
+        from anndata import AnnData
+        from pooch.typing import Processor
+        from spatialdata import SpatialData
+    else:
+        from typing import TypeAliasType
+
+        # TypeAliasType.__module__ is readonly, so we have to be a bit creative.
+        Processor = eval('A("Processor", object)', globals=dict(__name__="pooch.typing", A=TypeAliasType))
+
+
+__all__ = ["register_loader", "available_loaders", "fetch", "Loader", "DownloadCB"]
+
+
+class Loader[T](Protocol):
+    """Function that can be annotated by :func:`register_loader`."""
+
+    def __call__(self, entry: DatasetEntry, target: Path, download: DownloadCB, /, **kwargs: object) -> T:
+        """Call `download` (see :class:`DownloadCB`) and load ``entry``.
+
+        Args:
+            entry: File to download.
+            target: Loaded when it exists, otherwise it will be created.
+            download: Called when `target` doesn’t exist.
+            kwargs: Passed to `download`.
+        """
+
+
+class DownloadCB(Protocol):
+    """Callback passed as `download` to a :class:`Loader`."""
+
+    def __call__(self, file: FileEntry, /, *, dest: Path | None = None, processor: Processor | None = None) -> str:
+        """Download ``file`` if necessary.
+
+        Args:
+            file: File to download.
+            dest: Optional target directory, defaults to :func:`fetch`’s `cache_dir / entry.type`.
+            processor: Optional archive processor.
+        """
+
+
+_LOADERS: dict[str, Loader[object]] = {}
+
+
+@overload
+def register_loader[T](type_name: str) -> Callable[[Loader[T]], Loader[T]]: ...
+@overload
+def register_loader[T](type_name: str, loader: Loader[T]) -> Loader[T]: ...
+def register_loader[T](type_name: str, loader: Loader[T] | None = None) -> Callable[[Loader[T]], Loader[T]] | Loader[T]:
+    """Register a :class:`Loader` for a dataset ``type`` (decorator or direct call)."""
+
+    def deco(fn: Loader[T]) -> Loader[T]:
+        _LOADERS[type_name] = fn
+        return fn
+
+    return deco if loader is None else deco(loader)
+
+
+def available_loaders() -> list[str]:
+    """Return the names of all registered loader types."""
+    return sorted(_LOADERS)
+
+
+def fetch[T](
+    entry: DatasetEntry, cache_dir: str | Path, *, base_url: str | None = None, retries: int = 3, **kwargs: object
+) -> T:  # type: ignore[type-var]
+    """Download (if needed) and load ``entry``, dispatching to the loader registered for ``entry.type``.
+
+    Files are cached under ``cache_dir / entry.type``. ``kwargs`` are passed to the loader.
+    """
+    target = Path(cache_dir) / entry.type
+
+    def download(file: FileEntry, /, dest: Path | None = None, processor: Processor | None = None) -> str:
+        import pooch
+
+        out = dest or target
+        out.mkdir(parents=True, exist_ok=True)
+        pup = pooch.create(
+            path=str(out),
+            base_url="",
+            registry={file.name: f"sha256:{file.sha256}" if file.sha256 else None},
+            urls={file.name: file.resolve_url(base_url)},
+            retry_if_failed=retries,
+        )
+        return pup.fetch(file.name, processor=processor, progressbar=True)
+
+    if entry.type not in _LOADERS:
+        raise KeyError(f"No loader registered for type {entry.type!r}. Available: {available_loaders()}")
+    return cast("Loader[T]", _LOADERS[entry.type])(entry, target, download, **kwargs)
+
+
+@register_loader("anndata")
+def _load_anndata(entry: DatasetEntry, target: Path, download: DownloadCB, /, **kwargs: object) -> AnnData:
+    """Built-in loader: download a single ``.h5ad`` and read it with :func:`anndata.read_h5ad`."""
+    import anndata
+
+    return anndata.read_h5ad(download(entry.file(suffix=".h5ad")), **cast("dict[str, Any]", kwargs))
+
+
+@register_loader("spatialdata")
+def _load_spatialdata(entry: DatasetEntry, target: Path, download: DownloadCB, /, **kwargs: object) -> SpatialData:
+    """Built-in loader: download a ``.zip``, unzip it (via pooch) and read the single ``.zarr`` store inside.
+
+    Extracts into a per-dataset directory so the ``.zarr`` can be found by glob (its name need not match
+    the registry key) without colliding with other spatialdata datasets cached under the same ``target``.
+    Needs the ``spatialdata`` extra.
+    """
+    import pooch
+    import spatialdata as sd
+
+    dest = target / entry.name
+    download(entry.file(suffix=".zip"), dest=dest, processor=pooch.Unzip(extract_dir="."))
+    zarrs = sorted(dest.glob("*.zarr"))
+    if len(zarrs) != 1:
+        raise RuntimeError(f"Expected exactly one .zarr extracted under {dest}, found {len(zarrs)}: {zarrs}.")
+    return sd.read_zarr(zarrs[0], **cast("dict[str, Any]", kwargs))
diff --git a/src/scverse_misc/datasets/_registry.py b/src/scverse_misc/datasets/_registry.py
new file mode 100644
index 0000000..f256f3e
--- /dev/null
+++ b/src/scverse_misc/datasets/_registry.py
@@ -0,0 +1,109 @@
+"""Typed dataset entries + a YAML parser. Plain data — no registry/fetcher machinery."""
+
+from __future__ import annotations
+
+import warnings
+from dataclasses import dataclass, field, fields
+from typing import TYPE_CHECKING, Any
+
+import yaml
+
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+    from os import PathLike
+
+__all__ = ["FileEntry", "DatasetEntry", "parse_registry"]
+
+
+@dataclass(frozen=True, slots=True)
+class FileEntry:
+    """A single downloadable file belonging to a dataset.
+
+    Parameters
+    ----------
+    name
+        File name as it should appear on disk (e.g. ``"cells.zip"``).
+    url
+        Full download URL (e.g. a Zenodo file URL). Takes precedence over ``s3_key``.
+    s3_key
+        Key relative to the registry's ``base_url``. Used when ``url`` is unset.
+    sha256
+        Expected SHA-256 hash. If set, downloads are verified against it.
+    """
+
+    name: str
+    url: str | None = None
+    s3_key: str | None = None
+    sha256: str | None = None
+
+    def resolve_url(self, base_url: str | None = None) -> str:
+        """Resolve the download URL: the explicit ``url`` if set, else ``base_url/s3_key``."""
+        if self.url:
+            return self.url
+        if base_url and self.s3_key:
+            return f"{base_url.rstrip('/')}/{self.s3_key}"
+        raise ValueError(f"FileEntry {self.name!r} has neither `url` nor `s3_key` (with a registry `base_url`).")
+
+
+@dataclass(frozen=True, slots=True)
+class DatasetEntry:
+    """A named dataset made up of one or more files.
+
+    ``metadata`` holds everything in the YAML row other than ``type`` and ``files``
+    (e.g. ``shape``, ``library_id``, ``doc_header``); the core does not interpret it.
+    """
+
+    name: str
+    type: str
+    files: tuple[FileEntry, ...]
+    metadata: Mapping[str, Any] = field(default_factory=dict)
+
+    def file(self, *, name: str | None = None, suffix: str | None = None) -> FileEntry:
+        """Return the file matching ``name`` (exact) or ``suffix`` (endswith). Raises unless exactly one matches."""
+        if name is not None:
+            matches = [f for f in self.files if f.name == name]
+            crit = f"name={name!r}"
+        elif suffix is not None:
+            matches = [f for f in self.files if f.name.endswith(suffix)]
+            crit = f"suffix={suffix!r}"
+        else:
+            raise ValueError("Pass exactly one of `name` or `suffix`.")
+        if len(matches) != 1:
+            raise ValueError(f"Expected exactly one file with {crit} in {self.name!r}, found {len(matches)}.")
+        return matches[0]
+
+
+_FILE_FIELDS = frozenset(f.name for f in fields(FileEntry))
+
+
+def _file_entry(fd: Mapping[str, Any], dataset: str) -> FileEntry:
+    """Build a :class:`FileEntry`, warning on (and dropping) keys it doesn't recognise.
+
+    Unknown keys are tolerated so per-file extras (e.g. ``description``) don't crash the
+    parse, but a warning surfaces likely typos.
+    """
+    if unknown := fd.keys() - _FILE_FIELDS:
+        warnings.warn(f"Ignoring unknown file keys {sorted(unknown)} in dataset {dataset!r}.", stacklevel=3)
+    return FileEntry(**{k: v for k, v in fd.items() if k in _FILE_FIELDS})
+
+
+def parse_registry(path: PathLike[str] | str) -> tuple[str | None, dict[str, DatasetEntry]]:
+    """Parse a YAML registry into ``(base_url, {name: DatasetEntry})``.
+
+    The YAML has a top-level ``base_url`` (or ``s3_base_url``) and a ``datasets`` mapping of
+    ``name -> {type, files: [{name, url?/s3_key?, sha256?}], ...}``. Any keys other than ``type``
+    and ``files`` are collected into the entry's ``metadata``.
+    """
+    with open(path) as f:
+        config = yaml.safe_load(f) or {}
+    base_url = config.get("base_url") or config.get("s3_base_url")
+    datasets = {
+        name: DatasetEntry(
+            name=name,
+            type=row["type"],
+            files=tuple(_file_entry(fd, name) for fd in row.get("files", [])),
+            metadata={k: v for k, v in row.items() if k not in ("type", "files")},
+        )
+        for name, row in (config.get("datasets") or {}).items()
+    }
+    return base_url, datasets
diff --git a/stubs/anndata.pyi b/stubs/anndata.pyi
new file mode 100644
index 0000000..e9e2013
--- /dev/null
+++ b/stubs/anndata.pyi
@@ -0,0 +1,6 @@
+import os
+from typing import Any
+
+class AnnData: ...
+
+def read_h5ad(path: str | os.PathLike[str], **kwargs: Any) -> AnnData: ...  # noqa: ANN401
diff --git a/stubs/pooch/__init__.pyi b/stubs/pooch/__init__.pyi
new file mode 100644
index 0000000..b729956
--- /dev/null
+++ b/stubs/pooch/__init__.pyi
@@ -0,0 +1,37 @@
+from .typing import Downloader, PathInputType, PathType, Processor
+
+def create(
+    path: PathInputType,
+    base_url: str,
+    version: str | None = None,
+    version_dev: str = "master",
+    env: str | None = None,
+    registry: dict[str, str | None] | None = None,
+    urls: dict[str, str] | None = None,
+    retry_if_failed: int = 0,
+    allow_updates: bool | str = True,
+) -> Pooch: ...
+
+class Pooch:
+    def __init__(
+        self,
+        path: PathType,
+        base_url: str,
+        registry: dict[str, str | None] | None = None,
+        urls: dict[str, str] | None = None,
+        retry_if_failed: int = 0,
+        allow_updates: bool = True,
+    ) -> None: ...
+    def fetch(
+        self,
+        fname: str,
+        processor: Processor | None = None,
+        downloader: Downloader | None = None,
+        progressbar: bool = False,
+    ) -> str: ...
+
+class Unzip:
+    def __init__(self, extract_dir: str | None = None) -> None: ...
+    def __call__(self, fname: str, action: str | None, pooch: Pooch | None) -> object: ...
+
+_u: Processor = Unzip()  # type assertion
diff --git a/stubs/pooch/typing.pyi b/stubs/pooch/typing.pyi
new file mode 100644
index 0000000..fdfac0b
--- /dev/null
+++ b/stubs/pooch/typing.pyi
@@ -0,0 +1,20 @@
+import os
+from collections.abc import Callable
+from typing import Literal, Protocol
+
+from . import Pooch
+
+type Action = Literal["download", "fetch", "update"]
+type PathType = str | os.PathLike[str]
+type PathInputType = PathType | list[PathType] | tuple[PathType, ...]
+type Processor = Callable[[str, Action, Pooch | None], object]
+
+class Downloader(Protocol):
+    def __call__(  # noqa: E704
+        self,
+        fname: str,
+        action: PathType | None,
+        pooch: Pooch | None,
+        *,
+        check_only: bool | None = None,
+    ) -> object: ...
diff --git a/stubs/spatialdata.pyi b/stubs/spatialdata.pyi
new file mode 100644
index 0000000..3d023c7
--- /dev/null
+++ b/stubs/spatialdata.pyi
@@ -0,0 +1,6 @@
+import os
+from typing import Any
+
+class SpatialData: ...
+
+def read_zarr(path: str | os.PathLike[str], **kwargs: Any) -> SpatialData: ...  # noqa: ANN401
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
new file mode 100644
index 0000000..6f5da2b
--- /dev/null
+++ b/tests/test_datasets.py
@@ -0,0 +1,188 @@
+from __future__ import annotations
+
+import sys
+import types
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import pytest
+
+from scverse_misc.datasets import (
+    DatasetEntry,
+    FileEntry,
+    _fetcher,
+    available_loaders,
+    fetch,
+    parse_registry,
+    register_loader,
+)
+
+if TYPE_CHECKING:
+    from scverse_misc.datasets import DownloadCB
+
+
+_YAML = """\
+base_url: https://example.org/data/
+datasets:
+  toy:
+    type: dummy
+    shape: [10, 3]
+    files:
+      - name: toy.h5ad
+        s3_key: toy.h5ad
+        sha256: abc123
+  remote:
+    type: dummy
+    files:
+      - name: remote.zip
+        url: https://zenodo.org/records/1/files/remote.zip
+"""
+
+
+@pytest.fixture
+def registry(tmp_path: Path) -> dict[str, DatasetEntry]:
+    p = tmp_path / "datasets.yaml"
+    p.write_text(_YAML)
+    base_url, datasets = parse_registry(p)
+    assert base_url == "https://example.org/data/"
+    return datasets
+
+
+def test_parse_registry(registry: dict[str, DatasetEntry]) -> None:
+    assert set(registry) == {"toy", "remote"}
+    toy = registry["toy"]
+    assert toy.type == "dummy"
+    assert toy.metadata["shape"] == [10, 3]  # non-type/files keys land in metadata
+    assert toy.file(suffix=".h5ad").sha256 == "abc123"
+
+
+def test_parse_registry_warns_on_extra_file_keys(tmp_path: Path) -> None:
+    p = tmp_path / "datasets.yaml"
+    p.write_text(
+        "datasets:\n"
+        "  d:\n"
+        "    type: dummy\n"
+        "    files:\n"
+        "      - name: x.h5ad\n"
+        "        url: https://z/x.h5ad\n"
+        "        description: an unknown-to-FileEntry key\n"
+    )
+    # unknown keys are dropped (not fatal) but warned about so typos surface
+    with pytest.warns(UserWarning, match="unknown file keys.*description"):
+        _, datasets = parse_registry(p)
+    assert datasets["d"].file(name="x.h5ad").url == "https://z/x.h5ad"
+
+
+def test_resolve_url() -> None:
+    # explicit url takes precedence over s3_key
+    assert FileEntry(name="x.zip", url="https://z/x.zip", s3_key="x.zip").resolve_url("https://b/") == "https://z/x.zip"
+    # s3_key resolves against base_url
+    assert FileEntry(name="x", s3_key="k").resolve_url("https://b") == "https://b/k"
+    # neither resolvable -> error
+    with pytest.raises(ValueError, match="neither"):
+        FileEntry(name="x", s3_key="k").resolve_url(None)
+
+
+def test_file_selection_is_unambiguous(registry: dict[str, DatasetEntry]) -> None:
+    assert registry["toy"].file(name="toy.h5ad").s3_key == "toy.h5ad"  # exact name match
+    with pytest.raises(ValueError, match="exactly one"):
+        registry["toy"].file(name="nope.h5ad")
+    with pytest.raises(ValueError, match="exactly one"):
+        registry["toy"].file(suffix=".missing")
+    with pytest.raises(ValueError, match="exactly one of"):
+        registry["toy"].file()
+
+
+def test_builtin_loaders_are_shipped() -> None:
+    assert {"anndata", "spatialdata"} <= set(available_loaders())
+
+
+def test_register_and_dispatch(registry: dict[str, DatasetEntry], tmp_path: Path) -> None:
+    seen: dict[str, object] = {}
+
+    @register_loader("dummy")
+    def _load(entry: DatasetEntry, target: Path, download: DownloadCB, /, **kw: object) -> str:
+        seen.update(kw)
+        return entry.name
+
+    try:
+        # dummy loader does no download, so no network / pooch needed
+        assert fetch(registry["toy"], tmp_path, base_url="https://b", foo=1) == "toy"
+        assert seen == {"foo": 1}
+    finally:
+        _fetcher._LOADERS.pop("dummy", None)
+
+
+def test_unknown_loader(registry: dict[str, DatasetEntry], tmp_path: Path) -> None:
+    # "toy" is type "dummy" but no dummy loader registered here
+    with pytest.raises(KeyError, match="No loader registered"):
+        fetch(registry["toy"], tmp_path)
+
+
+def test_download_drives_pooch(
+    registry: dict[str, DatasetEntry], tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """The `download` closure wires FileEntry -> pooch.create/fetch without touching the network."""
+    calls: dict[str, object] = {}
+
+    class FakePup:
+        def fetch(self, name: str, *, processor: object, progressbar: bool) -> str:
+            calls["fetched"] = name
+            return f"/cache/{name}"
+
+    def fake_create(**kw: object) -> FakePup:
+        calls.update(kw)
+        return FakePup()
+
+    import pooch
+
+    monkeypatch.setattr(pooch, "create", fake_create)
+
+    @register_loader("dummy")
+    def _load(entry: DatasetEntry, target: Path, download: DownloadCB, /, **kw: object) -> str:
+        return download(entry.file(suffix=".h5ad"))
+
+    try:
+        assert fetch(registry["toy"], tmp_path, base_url="https://b") == "/cache/toy.h5ad"
+    finally:
+        _fetcher._LOADERS.pop("dummy", None)
+
+    assert calls["urls"] == {"toy.h5ad": "https://b/toy.h5ad"}
+    assert calls["registry"] == {"toy.h5ad": "sha256:abc123"}
+    assert calls["fetched"] == "toy.h5ad"
+
+
+# old anndata versions use the old arguments
+@pytest.mark.filterwarnings(
+    r"ignore:The (decorator_name|docstring_style|exported_object_name)( class)? argument is deprecated:DeprecationWarning"
+)
+def test_load_anndata_reads_h5ad(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    import anndata
+
+    monkeypatch.setattr(anndata, "read_h5ad", lambda path, **kw: ("adata", path, kw))
+    entry = DatasetEntry(name="toy", type="anndata", files=(FileEntry(name="toy.h5ad", url="https://z/toy.h5ad"),))
+    result: object = _fetcher._load_anndata(entry, tmp_path, lambda f, **kw: "/cache/toy.h5ad", backed="r")
+    assert result == ("adata", "/cache/toy.h5ad", {"backed": "r"})
+
+
+def test_load_spatialdata_reads_zarr(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    fake_sd = types.ModuleType("spatialdata")
+    fake_sd.read_zarr = lambda path, **kw: ("sdata", path)  # type: ignore[attr-defined]
+    monkeypatch.setitem(sys.modules, "spatialdata", fake_sd)
+    entry = DatasetEntry(
+        name="cells", type="spatialdata", files=(FileEntry(name="cells.zip", url="https://z/cells.zip"),)
+    )
+
+    # download extracted nothing -> loud failure (0 zarrs found)
+    with pytest.raises(RuntimeError, match="Expected exactly one"):
+        _fetcher._load_spatialdata(entry, tmp_path, lambda f, **kw: str(kw["dest"]))
+
+    # the extracted .zarr need not be named after the registry key; glob finds the single one
+    def extract(file: FileEntry, **kw: object) -> str:
+        dest = kw["dest"]
+        assert isinstance(dest, Path)
+        (dest / "whatever.zarr").mkdir(parents=True)
+        return str(dest)
+
+    result: object = _fetcher._load_spatialdata(entry, tmp_path, extract)
+    assert result == ("sdata", tmp_path / "cells" / "whatever.zarr")