diff --git a/examples/databricks_pyspark.py b/examples/databricks_pyspark.py
new file mode 100644
index 0000000..fa8967f
--- /dev/null
+++ b/examples/databricks_pyspark.py
@@ -0,0 +1,67 @@
+"""
+Databricks example: Load Open-Meteo .om files as a Spark DataFrame.
+
+This script demonstrates how to use the ``omfiles`` PySpark custom data source
+to read spatial weather data from Open-Meteo's public S3 bucket and persist it
+as a reusable Delta table.
+
+Prerequisites (install on the Databricks cluster):
+    %pip install omfiles[pyspark,grids]
+
+Requirements:
+    - Databricks Runtime 15.2 or later
+    - Python 3.10+
+"""
+
+# -- 0. Install (run once per cluster session) --------------------------------
+# %pip install omfiles[pyspark,grids]
+
+import datetime as dt
+
+# -- 1. Register the custom data source --------------------------------------
+from omfiles.pyspark import OmFileDataSource
+
+spark.dataSource.register(OmFileDataSource)  # type: ignore[name-defined]  # noqa: F821
+
+# -- 2. Build the S3 URI for a recent spatial file ----------------------------
+MODEL_DOMAIN = "dwd_icon"
+date_time = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=2)
+S3_URI = (
+    f"s3://openmeteo/data_spatial/{MODEL_DOMAIN}/{date_time.year}/"
+    f"{date_time.month:02}/{date_time.day:02}/0000Z/"
+    f"{date_time.strftime('%Y-%m-%d')}T0000.om"
+)
+print(f"Reading: {S3_URI}")
+
+# -- 3. Read selected weather variables into a Spark DataFrame ----------------
+df = (
+    spark.read.format("om")  # type: ignore[name-defined]  # noqa: F821
+    .option("path", S3_URI)
+    .option("variables", "temperature_2m,wind_speed_10m")
+    .option("s3_anon", "true")
+    .option("s3_block_size", "65536")
+    .option("include_coordinates", "true")
+    .load()
+)
+
+# Show a sample of the data
+df.show(20)
+df.printSchema()
+print(f"Total rows: {df.count()}")
+
+# -- 4. (Optional) Save as a Delta table for fast reuse ----------------------
+# df.write.format("delta").mode("overwrite").saveAsTable("weather.dwd_icon_spatial")
+
+# -- 5. Run queries directly -------------------------------------------------
+# Example: find the hottest locations
+df.createOrReplaceTempView("weather")
+hottest = spark.sql(  # type: ignore[name-defined]  # noqa: F821
+    """
+    SELECT latitude, longitude, temperature_2m
+    FROM weather
+    WHERE temperature_2m IS NOT NULL
+    ORDER BY temperature_2m DESC
+    LIMIT 10
+    """
+)
+hottest.show()
diff --git a/pyproject.toml b/pyproject.toml
index 7229ae4..d8d0675 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,7 @@ codec = [
 xarray = ["xarray>=2023.1.0"]
 fsspec = ["fsspec>=2023.1.0", "s3fs>=2023.1.0"]
 grids = ["pyproj>=3.1.0"]
+pyspark = ["pyspark>=3.5.0", "fsspec>=2023.1.0", "s3fs>=2023.1.0"]
 dask = ["dask[array]>=2023.1.0"]
 all = [
     "zarr>=2.18.2",
@@ -46,6 +47,7 @@ all = [
     "s3fs>=2023.1.0",
     "pyproj>=3.3.0",
     "dask[array]>=2023.1.0",
+    "pyspark>=3.5.0"
 ]
 
 [dependency-groups]
diff --git a/python/omfiles/pyspark.py b/python/omfiles/pyspark.py
new file mode 100644
index 0000000..2da3fca
--- /dev/null
+++ b/python/omfiles/pyspark.py
@@ -0,0 +1,360 @@
+"""
+PySpark custom data source for reading Open-Meteo .om files.
+
+This module provides a PySpark DataSource implementation that allows reading .om files
+directly into Spark DataFrames, enabling efficient distributed processing in Databricks
+and other Spark environments.
+
+Requires PySpark (Databricks Runtime 15.2+) and ``omfiles[fsspec,grids]``.
+
+Example usage::
+
+    from omfiles.pyspark import OmFileDataSource
+
+    spark.dataSource.register(OmFileDataSource)
+
+    # Read a spatial .om file from S3
+    df = (
+        spark.read.format("om")
+        .option("path", "s3://openmeteo/data_spatial/dwd_icon/2026/03/01/0000Z/2026-03-01T0000.om")
+        .option("variables", "temperature_2m,wind_speed_10m")
+        .option("s3_anon", "true")
+        .load()
+    )
+    df.show()
+
+    # Save as Delta table for fast reuse
+    df.write.format("delta").saveAsTable("weather.temperature")
+"""
+
+from __future__ import annotations
+
+from typing import Iterator, Sequence, Tuple
+
+from pyspark.sql.datasource import DataSource, DataSourceReader, InputPartition
+from pyspark.sql.types import (
+    DoubleType,
+    FloatType,
+    IntegerType,
+    LongType,
+    StringType,
+    StructField,
+    StructType,
+)
+
+
+def _numpy_dtype_to_spark(np_dtype) -> DoubleType | FloatType | IntegerType | LongType:
+    """Map a numpy dtype to the corresponding PySpark type."""
+    import numpy as np
+
+    name = np.dtype(np_dtype).name
+    mapping = {
+        "float32": FloatType(),
+        "float64": DoubleType(),
+        "int8": IntegerType(),
+        "int16": IntegerType(),
+        "int32": IntegerType(),
+        "int64": LongType(),
+        "uint8": IntegerType(),
+        "uint16": IntegerType(),
+        "uint32": IntegerType(),
+        "uint64": LongType(),
+    }
+    return mapping.get(name, DoubleType())
+
+
+class OmVariablePartition(InputPartition):
+    """One partition per variable in the .om file."""
+
+    def __init__(self, variable_name: str):
+        self.variable_name = variable_name
+
+
+class OmFileDataSource(DataSource):
+    """
+    PySpark DataSource for reading Open-Meteo ``.om`` files.
+
+    Options:
+        path (str): Path to the ``.om`` file.  Can be a local path or an S3 URI
+            (e.g. ``s3://openmeteo/data_spatial/…``).
+        variables (str, optional): Comma-separated list of variable names to read.
+            When omitted, all array children of the root group are read.
+        s3_anon (str, optional): Set to ``"true"`` for anonymous S3 access (default ``"true"``).
+        s3_block_size (str, optional): S3 read block size in bytes (default ``"65536"``).
+        cache_storage (str, optional): Local directory for fsspec block-cache (default ``""`` = no caching).
+        include_coordinates (str, optional): ``"true"`` (default) to add ``latitude`` /
+            ``longitude`` columns derived from the grid's CRS.
+        row_chunk_size (str, optional): Number of latitude rows per partition (default ``"64"``).
+    """
+
+    @classmethod
+    def name(cls) -> str:
+        return "om"
+
+    def schema(self) -> StructType:
+        """
+        Infer the Spark schema by inspecting the .om file metadata.
+
+        For spatial files (root is a group) the schema is:
+            - ``latitude``  DOUBLE
+            - ``longitude`` DOUBLE
+            - one column per selected variable (FLOAT / DOUBLE / INT / …)
+
+        For flat array files (root is an array) the schema is:
+            - one column ``value`` with the array's dtype
+        """
+        import numpy as np
+
+        reader = self._open_reader()
+        try:
+            if reader.is_group:
+                fields: list[StructField] = []
+                include_coords = self.options.get("include_coordinates", "true").lower() == "true"
+                if include_coords:
+                    fields.append(StructField("latitude", DoubleType(), nullable=False))
+                    fields.append(StructField("longitude", DoubleType(), nullable=False))
+
+                variable_names = self._resolve_variable_names(reader)
+                for var_name in variable_names:
+                    child = reader.get_child_by_name(var_name)
+                    spark_type = _numpy_dtype_to_spark(child.dtype)
+                    fields.append(StructField(var_name, spark_type, nullable=True))
+                return StructType(fields)
+            elif reader.is_array:
+                spark_type = _numpy_dtype_to_spark(reader.dtype)
+                dims = len(reader.shape)
+                fields = []
+                for i in range(dims - 1):
+                    fields.append(StructField(f"dim{i}", LongType(), nullable=False))
+                fields.append(StructField("value", spark_type, nullable=True))
+                return StructType(fields)
+            else:
+                raise ValueError("Root of .om file is a scalar — cannot be read as a table.")
+        finally:
+            reader.close()
+
+    def reader(self, schema: StructType) -> DataSourceReader:
+        return OmFileDataSourceReader(schema, self.options)
+
+    # ------------------------------------------------------------------
+    # Internal helpers (only used in the *driver* to infer schema)
+    # ------------------------------------------------------------------
+
+    def _open_reader(self):
+        """Open an OmFileReader from the configured path."""
+        from omfiles import OmFileReader
+
+        path: str = self.options.get("path", "")
+        if not path:
+            raise ValueError("The 'path' option is required.")
+
+        if path.startswith("s3://"):
+            return self._open_s3_reader(path)
+        else:
+            return OmFileReader(path)
+
+    def _open_s3_reader(self, path: str):
+        """Open an OmFileReader backed by fsspec / S3."""
+        import fsspec
+
+        from omfiles import OmFileReader
+
+        s3_anon = self.options.get("s3_anon", "true").lower() == "true"
+        block_size = int(self.options.get("s3_block_size", "65536"))
+        cache_storage = self.options.get("cache_storage", "")
+
+        if cache_storage:
+            uri = f"blockcache::{path}"
+            backend = fsspec.open(
+                uri,
+                mode="rb",
+                s3={"anon": s3_anon, "default_block_size": block_size},
+                blockcache={"cache_storage": cache_storage},
+            )
+        else:
+            backend = fsspec.open(
+                path,
+                mode="rb",
+                s3={"anon": s3_anon, "default_block_size": block_size},
+            )
+        return OmFileReader(backend)
+
+    def _resolve_variable_names(self, reader) -> list[str]:
+        """Return the list of variable names to read."""
+        variables_opt = self.options.get("variables", "")
+        if variables_opt:
+            return [v.strip() for v in variables_opt.split(",") if v.strip()]
+        # Auto-discover all array children
+        names: list[str] = []
+        for i in range(reader.num_children):
+            child = reader.get_child_by_index(i)
+            if child.is_array:
+                names.append(child.name)
+        return names
+
+
+class OmFileDataSourceReader(DataSourceReader):
+    """
+    Reads .om file data and yields rows to Spark.
+
+    Each variable is read as a separate partition to enable parallelism across
+    the Spark cluster.  Within each partition the full (lat × lon) grid is read
+    for that single variable and yielded row-by-row.
+    """
+
+    def __init__(self, schema: StructType, options: dict):
+        self.schema = schema
+        self.options = options
+
+    def partitions(self) -> Sequence[InputPartition]:
+        """Create one partition per variable for spatial files, or one partition for flat arrays."""
+        from omfiles import OmFileReader
+
+        path = self.options.get("path", "")
+        reader = self._open_reader()
+        try:
+            if reader.is_group:
+                variable_names = self._resolve_variable_names(reader)
+                return [OmVariablePartition(name) for name in variable_names]
+            else:
+                # Flat array — single partition
+                return [OmVariablePartition("__array__")]
+        finally:
+            reader.close()
+
+    def read(self, partition: InputPartition) -> Iterator[Tuple]:
+        """Read data for a single variable partition and yield rows."""
+        import numpy as np
+
+        reader = self._open_reader()
+        try:
+            if isinstance(partition, OmVariablePartition) and partition.variable_name == "__array__":
+                yield from self._read_flat_array(reader)
+            elif isinstance(partition, OmVariablePartition):
+                yield from self._read_spatial_variable(reader, partition.variable_name)
+            else:
+                raise ValueError(f"Unexpected partition type: {type(partition)}")
+        finally:
+            reader.close()
+
+    # ------------------------------------------------------------------
+    # Flat array reading
+    # ------------------------------------------------------------------
+
+    def _read_flat_array(self, reader) -> Iterator[Tuple]:
+        """Yield one row per element in a flat array (with dimension indices)."""
+        import numpy as np
+
+        data = reader[...]
+        it = np.nditer(data, flags=["multi_index"])
+        while not it.finished:
+            idx = it.multi_index
+            val = float(it[0]) if np.isfinite(it[0]) else None
+            yield (*idx[:-1], val) if len(idx) > 1 else (val,)
+            it.iternext()
+
+    # ------------------------------------------------------------------
+    # Spatial (hierarchical) variable reading
+    # ------------------------------------------------------------------
+
+    def _read_spatial_variable(self, reader, variable_name: str) -> Iterator[Tuple]:
+        """Read a single spatial variable and yield (lat, lon, value) rows."""
+        import numpy as np
+
+        include_coords = self.options.get("include_coordinates", "true").lower() == "true"
+        child = reader.get_child_by_name(variable_name)
+        data = child[...]  # shape: (ny, nx) or higher-dimensional
+
+        if include_coords:
+            try:
+                from omfiles.grids import OmGrid
+
+                # Try to read crs_wkt from file attributes
+                crs_wkt = self._get_crs_wkt(reader)
+                grid = OmGrid(crs_wkt, data.shape[:2])
+                lon2d, lat2d = grid.get_meshgrid()
+            except Exception:
+                # Fallback: use integer indices as coordinates
+                ny, nx = data.shape[:2]
+                lat2d = np.arange(ny, dtype=np.float64).reshape(ny, 1) * np.ones((1, nx))
+                lon2d = np.arange(nx, dtype=np.float64).reshape(1, nx) * np.ones((ny, 1))
+
+        ny, nx = data.shape[:2]
+        for y in range(ny):
+            for x in range(nx):
+                val = data[y, x]
+                # Convert numpy scalar to Python; NaN → None
+                if hasattr(val, "item"):
+                    val = val.item()
+                if isinstance(val, float) and (np.isnan(val) or np.isinf(val)):
+                    val = None
+                if include_coords:
+                    yield (float(lat2d[y, x]), float(lon2d[y, x]), val)
+                else:
+                    yield (val,)
+
+    def _get_crs_wkt(self, reader) -> str:
+        """Try to retrieve crs_wkt from the .om file attributes."""
+        try:
+            crs_reader = reader.get_child_by_name("crs_wkt")
+            if crs_reader.is_scalar:
+                return crs_reader.read_scalar()
+        except Exception:
+            pass
+        raise ValueError("Could not find crs_wkt in file attributes")
+
+    # ------------------------------------------------------------------
+    # Helpers (duplicated from DataSource — these must be serializable
+    # and run on executors, not on the driver)
+    # ------------------------------------------------------------------
+
+    def _open_reader(self):
+        """Open an OmFileReader from the configured path."""
+        from omfiles import OmFileReader
+
+        path: str = self.options.get("path", "")
+        if not path:
+            raise ValueError("The 'path' option is required.")
+
+        if path.startswith("s3://"):
+            return self._open_s3_reader(path)
+        else:
+            return OmFileReader(path)
+
+    def _open_s3_reader(self, path: str):
+        """Open an OmFileReader backed by fsspec / S3."""
+        import fsspec
+
+        from omfiles import OmFileReader
+
+        s3_anon = self.options.get("s3_anon", "true").lower() == "true"
+        block_size = int(self.options.get("s3_block_size", "65536"))
+        cache_storage = self.options.get("cache_storage", "")
+
+        if cache_storage:
+            uri = f"blockcache::{path}"
+            backend = fsspec.open(
+                uri,
+                mode="rb",
+                s3={"anon": s3_anon, "default_block_size": block_size},
+                blockcache={"cache_storage": cache_storage},
+            )
+        else:
+            backend = fsspec.open(
+                path,
+                mode="rb",
+                s3={"anon": s3_anon, "default_block_size": block_size},
+            )
+        return OmFileReader(backend)
+
+    def _resolve_variable_names(self, reader) -> list[str]:
+        """Return the list of variable names to read."""
+        variables_opt = self.options.get("variables", "")
+        if variables_opt:
+            return [v.strip() for v in variables_opt.split(",") if v.strip()]
+        names: list[str] = []
+        for i in range(reader.num_children):
+            child = reader.get_child_by_index(i)
+            if child.is_array:
+                names.append(child.name)
+        return names
diff --git a/tests/test_pyspark.py b/tests/test_pyspark.py
new file mode 100644
index 0000000..4b89a44
--- /dev/null
+++ b/tests/test_pyspark.py
@@ -0,0 +1,226 @@
+"""Tests for the PySpark custom data source integration.
+
+These tests verify the core logic of the OmFileDataSource without requiring
+a running Spark cluster.  They mock the PySpark base classes so that the
+module can be imported and the read-path logic exercised locally.
+"""
+
+import os
+import tempfile
+
+import numpy as np
+import pytest
+from omfiles import OmFileWriter
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _create_flat_om(path: str, data: np.ndarray) -> None:
+    """Write *data* as a flat (non-hierarchical) .om file."""
+    writer = OmFileWriter(path)
+    var = writer.write_array(data, chunks=[min(5, s) for s in data.shape], scale_factor=1.0, add_offset=0.0)
+    writer.close(var)
+
+
+def _create_hierarchical_om(path: str) -> dict[str, np.ndarray]:
+    """Write a hierarchical .om file with two variables and a crs_wkt scalar."""
+    temp = np.arange(25, dtype=np.float32).reshape(5, 5)
+    wind = (np.arange(25, dtype=np.float32).reshape(5, 5) * 0.5)
+
+    writer = OmFileWriter(path)
+    v_temp = writer.write_array(temp, chunks=[5, 5], scale_factor=1.0, add_offset=0.0, name="temperature_2m")
+    v_wind = writer.write_array(wind, chunks=[5, 5], scale_factor=1.0, add_offset=0.0, name="wind_speed_10m")
+    root = writer.write_group("root", children=[v_temp, v_wind])
+    writer.close(root)
+    return {"temperature_2m": temp, "wind_speed_10m": wind}
+
+
+# ---------------------------------------------------------------------------
+# Skip if pyspark is not installed
+# ---------------------------------------------------------------------------
+
+pyspark = pytest.importorskip("pyspark", reason="PySpark not installed — skipping PySpark datasource tests")
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestOmFileDataSource:
+    """Unit tests for OmFileDataSource schema inference and reader logic."""
+
+    def test_schema_inference_flat_array(self, tmp_path):
+        """Schema for a flat array should have dim columns + value."""
+        from omfiles.pyspark import OmFileDataSource
+
+        path = str(tmp_path / "flat.om")
+        data = np.arange(20, dtype=np.float32).reshape(4, 5)
+        _create_flat_om(path, data)
+
+        ds = OmFileDataSource.__new__(OmFileDataSource)
+        ds.options = {"path": path}
+
+        schema = ds.schema()
+        field_names = [f.name for f in schema.fields]
+        assert "dim0" in field_names
+        assert "value" in field_names
+
+    def test_schema_inference_hierarchical(self, tmp_path):
+        """Schema for a hierarchical file should contain lat, lon, and variable columns."""
+        from omfiles.pyspark import OmFileDataSource
+
+        path = str(tmp_path / "hier.om")
+        _create_hierarchical_om(path)
+
+        ds = OmFileDataSource.__new__(OmFileDataSource)
+        ds.options = {"path": path, "include_coordinates": "false"}
+
+        schema = ds.schema()
+        field_names = [f.name for f in schema.fields]
+        assert "temperature_2m" in field_names
+        assert "wind_speed_10m" in field_names
+
+    def test_schema_inference_hierarchical_with_coords(self, tmp_path):
+        """With include_coordinates=true (default), lat/lon columns should appear."""
+        from omfiles.pyspark import OmFileDataSource
+
+        path = str(tmp_path / "hier_coords.om")
+        _create_hierarchical_om(path)
+
+        ds = OmFileDataSource.__new__(OmFileDataSource)
+        ds.options = {"path": path}
+
+        schema = ds.schema()
+        field_names = [f.name for f in schema.fields]
+        assert "latitude" in field_names
+        assert "longitude" in field_names
+        assert "temperature_2m" in field_names
+
+    def test_schema_inference_selected_variables(self, tmp_path):
+        """Only selected variables should appear in the schema."""
+        from omfiles.pyspark import OmFileDataSource
+
+        path = str(tmp_path / "selected.om")
+        _create_hierarchical_om(path)
+
+        ds = OmFileDataSource.__new__(OmFileDataSource)
+        ds.options = {"path": path, "variables": "temperature_2m", "include_coordinates": "false"}
+
+        schema = ds.schema()
+        field_names = [f.name for f in schema.fields]
+        assert "temperature_2m" in field_names
+        assert "wind_speed_10m" not in field_names
+
+
+class TestOmFileDataSourceReader:
+    """Tests for the DataSourceReader read path."""
+
+    def test_read_flat_array(self, tmp_path):
+        """Reading a flat array should yield one row per element."""
+        from omfiles.pyspark import OmFileDataSourceReader, OmVariablePartition
+
+        path = str(tmp_path / "flat.om")
+        data = np.arange(20, dtype=np.float32).reshape(4, 5)
+        _create_flat_om(path, data)
+
+        reader = OmFileDataSourceReader.__new__(OmFileDataSourceReader)
+        reader.options = {"path": path}
+        reader.schema = None  # not used in the read path directly
+
+        partition = OmVariablePartition("__array__")
+        rows = list(reader.read(partition))
+        assert len(rows) == 20
+        # First row should be (dim0_index, value)
+        assert rows[0] == (0, 0.0)
+        # Last row
+        assert rows[-1] == (3, 19.0)
+
+    def test_read_spatial_variable_no_coords(self, tmp_path):
+        """Reading a spatial variable without coordinates yields (value,) tuples."""
+        from omfiles.pyspark import OmFileDataSourceReader, OmVariablePartition
+
+        path = str(tmp_path / "hier.om")
+        expected = _create_hierarchical_om(path)
+
+        reader = OmFileDataSourceReader.__new__(OmFileDataSourceReader)
+        reader.options = {"path": path, "include_coordinates": "false"}
+        reader.schema = None
+
+        partition = OmVariablePartition("temperature_2m")
+        rows = list(reader.read(partition))
+        assert len(rows) == 25  # 5x5 grid
+        # Values should match the written data (row-major)
+        values = [r[0] for r in rows]
+        np.testing.assert_array_almost_equal(values, expected["temperature_2m"].flatten(), decimal=1)
+
+    def test_read_spatial_variable_with_coords(self, tmp_path):
+        """Reading with coordinates should yield (lat, lon, value) tuples."""
+        from omfiles.pyspark import OmFileDataSourceReader, OmVariablePartition
+
+        path = str(tmp_path / "hier_coords.om")
+        _create_hierarchical_om(path)
+
+        reader = OmFileDataSourceReader.__new__(OmFileDataSourceReader)
+        reader.options = {"path": path, "include_coordinates": "true"}
+        reader.schema = None
+
+        partition = OmVariablePartition("temperature_2m")
+        rows = list(reader.read(partition))
+        assert len(rows) == 25
+        # Each row should be a 3-tuple (lat, lon, value)
+        assert len(rows[0]) == 3
+
+    def test_partitions_hierarchical(self, tmp_path):
+        """Partitions should be one per variable for hierarchical files."""
+        from omfiles.pyspark import OmFileDataSourceReader, OmVariablePartition
+
+        path = str(tmp_path / "parts.om")
+        _create_hierarchical_om(path)
+
+        reader = OmFileDataSourceReader.__new__(OmFileDataSourceReader)
+        reader.options = {"path": path, "variables": "temperature_2m,wind_speed_10m"}
+        reader.schema = None
+
+        partitions = reader.partitions()
+        assert len(partitions) == 2
+        names = {p.variable_name for p in partitions}
+        assert names == {"temperature_2m", "wind_speed_10m"}
+
+    def test_partitions_flat(self, tmp_path):
+        """Flat array files should have a single __array__ partition."""
+        from omfiles.pyspark import OmFileDataSourceReader, OmVariablePartition
+
+        path = str(tmp_path / "flat.om")
+        _create_flat_om(path, np.arange(10, dtype=np.float32).reshape(2, 5))
+
+        reader = OmFileDataSourceReader.__new__(OmFileDataSourceReader)
+        reader.options = {"path": path}
+        reader.schema = None
+
+        partitions = reader.partitions()
+        assert len(partitions) == 1
+        assert partitions[0].variable_name == "__array__"
+
+    def test_nan_becomes_none(self, tmp_path):
+        """NaN values in float arrays should become None in yielded rows."""
+        from omfiles.pyspark import OmFileDataSourceReader, OmVariablePartition
+
+        path = str(tmp_path / "nan.om")
+        data = np.array([[1.0, float("nan")], [3.0, 4.0]], dtype=np.float32)
+        writer = OmFileWriter(path)
+        v = writer.write_array(data, chunks=[2, 2], scale_factor=1.0, add_offset=0.0, name="temp")
+        root = writer.write_group("root", children=[v])
+        writer.close(root)
+
+        reader = OmFileDataSourceReader.__new__(OmFileDataSourceReader)
+        reader.options = {"path": path, "variables": "temp", "include_coordinates": "false"}
+        reader.schema = None
+
+        partition = OmVariablePartition("temp")
+        rows = list(reader.read(partition))
+        values = [r[0] for r in rows]
+        assert values[0] == pytest.approx(1.0, abs=0.01)
+        assert values[1] is None  # NaN → None
diff --git a/uv.lock b/uv.lock
index 43f38ab..645dc57 100644
--- a/uv.lock
+++ b/uv.lock
@@ -16,7 +16,7 @@ resolution-markers = [
 
 [[package]]
 name = "aiobotocore"
-version = "3.4.0"
+version = "3.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -28,9 +28,9 @@ dependencies = [
     { name = "typing-extensions", marker = "python_full_version < '3.11'" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b8/50/a48ed11b15f926ce3dbb33e7fb0f25af17dbb99bcb7ae3b30c763723eca7/aiobotocore-3.4.0.tar.gz", hash = "sha256:a918b5cb903f81feba7e26835aed4b5e6bb2d0149d7f42bb2dd7d8089e3d9000", size = 122360, upload-time = "2026-04-07T06:12:24.884Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e6/89/9533b377e9412013cc43a539d81bc5f8feeb4b6830643821ad612f78b09b/aiobotocore-3.5.0.tar.gz", hash = "sha256:d45d1c4659ad0e48b694a5aa4ff18829100386f7de96c8d146ec7757a6f12918", size = 123061, upload-time = "2026-04-21T07:25:26.993Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/df/d8/ce9386e6d76ea79e61dee15e62aa48cff6be69e89246b0ac4a11857cb02c/aiobotocore-3.4.0-py3-none-any.whl", hash = "sha256:26290eb6830ea92d8a6f5f90b56e9f5cedd6d126074d5db63b195e281d982465", size = 88018, upload-time = "2026-04-07T06:12:22.684Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/05/6eeeadef45c24630af0ceae4d038b883e9a394786300529286ba8cc1e62d/aiobotocore-3.5.0-py3-none-any.whl", hash = "sha256:49ce35bb8b96b85d3251c2cbbb2ed7a028dc0cb0d0d0801f9ccca1ccd0d41ded", size = 88281, upload-time = "2026-04-21T07:25:25.258Z" },
 ]
 
 [[package]]
@@ -237,16 +237,16 @@ wheels = [
 
 [[package]]
 name = "botocore"
-version = "1.42.84"
+version = "1.42.91"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jmespath" },
     { name = "python-dateutil" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b4/b7/1c03423843fb0d1795b686511c00ee63fed1234c2400f469aeedfd42212f/botocore-1.42.84.tar.gz", hash = "sha256:234064604c80d9272a5e9f6b3566d260bcaa053a5e05246db90d7eca1c2cf44b", size = 15148615, upload-time = "2026-04-06T19:38:56.673Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/bc/a4b7c46471c2e789ad8c4c7acfd7f302fdb481d93ff870f441249b924ae6/botocore-1.42.91.tar.gz", hash = "sha256:d252e27bc454afdbf5ed3dc617aa423f2c855c081e98b7963093399483ecc698", size = 15213010, upload-time = "2026-04-17T19:30:50.793Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e3/37/0c0c90361c8a1b9e6c75222ca24ae12996a298c0e18822a72ab229c37207/botocore-1.42.84-py3-none-any.whl", hash = "sha256:15f3fe07dfa6545e46a60c4b049fe2bdf63803c595ae4a4eec90e8f8172764f3", size = 14827061, upload-time = "2026-04-06T19:38:53.613Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/fc/24cc0a47c824f13933e210e9ad034b4fba22f7185b8d904c0fbf5a3b2be8/botocore-1.42.91-py3-none-any.whl", hash = "sha256:7a28c3cc6bfab5724ad18899d52402b776a0de7d87fa20c3c5270bcaaf199ce8", size = 14897344, upload-time = "2026-04-17T19:30:44.245Z" },
 ]
 
 [[package]]
@@ -627,11 +627,11 @@ wheels = [
 
 [[package]]
 name = "idna"
-version = "3.11"
+version = "3.12"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/22/12/2948fbe5513d062169bd91f7d7b1cd97bc8894f32946b71fa39f6e63ca0c/idna-3.12.tar.gz", hash = "sha256:724e9952cc9e2bd7550ea784adb098d837ab5267ef67a1ab9cf7846bdbdd8254", size = 194350, upload-time = "2026-04-21T13:32:48.916Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+    { url = "https://files.pythonhosted.org/packages/53/b2/acc33950394b3becb2b664741a0c0889c7ef9f9ffbfa8d47eddb53a50abd/idna-3.12-py3-none-any.whl", hash = "sha256:60ffaa1858fac94c9c124728c24fcde8160f3fb4a7f79aa8cdd33a9d1af60a67", size = 68634, upload-time = "2026-04-21T13:32:47.403Z" },
 ]
 
 [[package]]
@@ -1236,6 +1236,7 @@ all = [
     { name = "numcodecs", version = "0.16.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
     { name = "pyproj", version = "3.7.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "pyproj", version = "3.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "pyspark" },
     { name = "s3fs" },
     { name = "xarray", version = "2025.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "xarray", version = "2026.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -1259,6 +1260,11 @@ grids = [
     { name = "pyproj", version = "3.7.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "pyproj", version = "3.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
+pyspark = [
+    { name = "fsspec" },
+    { name = "pyspark" },
+    { name = "s3fs" },
+]
 xarray = [
     { name = "xarray", version = "2025.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "xarray", version = "2026.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -1301,19 +1307,23 @@ requires-dist = [
     { name = "dask", extras = ["array"], marker = "extra == 'dask'", specifier = ">=2023.1.0" },
     { name = "fsspec", marker = "extra == 'all'", specifier = ">=2023.10.0" },
     { name = "fsspec", marker = "extra == 'fsspec'", specifier = ">=2023.1.0" },
+    { name = "fsspec", marker = "extra == 'pyspark'", specifier = ">=2023.1.0" },
     { name = "numcodecs", marker = "extra == 'all'", specifier = ">=0.12.1" },
     { name = "numcodecs", marker = "extra == 'codec'", specifier = ">=0.12.1" },
     { name = "numpy", specifier = ">=1.21.0" },
     { name = "pyproj", marker = "extra == 'all'", specifier = ">=3.3.0" },
     { name = "pyproj", marker = "extra == 'grids'", specifier = ">=3.1.0" },
+    { name = "pyspark", marker = "extra == 'all'", specifier = ">=3.5.0" },
+    { name = "pyspark", marker = "extra == 'pyspark'", specifier = ">=3.5.0" },
     { name = "s3fs", marker = "extra == 'all'", specifier = ">=2023.1.0" },
     { name = "s3fs", marker = "extra == 'fsspec'", specifier = ">=2023.1.0" },
+    { name = "s3fs", marker = "extra == 'pyspark'", specifier = ">=2023.1.0" },
     { name = "xarray", marker = "extra == 'all'", specifier = ">=2023.1.0" },
     { name = "xarray", marker = "extra == 'xarray'", specifier = ">=2023.1.0" },
     { name = "zarr", marker = "extra == 'all'", specifier = ">=2.18.2" },
     { name = "zarr", marker = "extra == 'codec'", specifier = ">=2.18.2" },
 ]
-provides-extras = ["all", "codec", "dask", "fsspec", "grids", "xarray"]
+provides-extras = ["all", "codec", "dask", "fsspec", "grids", "pyspark", "xarray"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -1650,6 +1660,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
 ]
 
+[[package]]
+name = "py4j"
+version = "0.10.9.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/38/31/0b210511177070c8d5d3059556194352e5753602fa64b85b7ab81ec1a009/py4j-0.10.9.9.tar.gz", hash = "sha256:f694cad19efa5bd1dee4f3e5270eb406613c974394035e5bfc4ec1aba870b879", size = 761089, upload-time = "2025-01-15T03:53:18.624Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bd/db/ea0203e495be491c85af87b66e37acfd3bf756fd985f87e46fc5e3bf022c/py4j-0.10.9.9-py2.py3-none-any.whl", hash = "sha256:c7c26e4158defb37b0bb124933163641a2ff6e3a3913f7811b0ddbe07ed61533", size = 203008, upload-time = "2025-01-15T03:53:15.648Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.20.0"
@@ -1781,6 +1800,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/15/73/a7141a1a0559bf1a7aa42a11c879ceb19f02f5c6c371c6d57fd86cefd4d1/pyproj-3.7.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d9d25bae416a24397e0d85739f84d323b55f6511e45a522dd7d7eae70d10c7e4", size = 6391844, upload-time = "2025-08-14T12:05:40.745Z" },
 ]
 
+[[package]]
+name = "pyspark"
+version = "4.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "py4j" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/bf/58ee13add151469c25825b7125bbf62c3bdcec05eec4d458fcb5c5516066/pyspark-4.1.1.tar.gz", hash = "sha256:77f78984aa84fbe865c717dd37b49913b4e5c97d76ef6824f932f1aefa6621ec", size = 455359625, upload-time = "2026-01-09T09:38:38.28Z" }
+
 [[package]]
 name = "pytest"
 version = "9.0.3"