diff --git a/pyproject.toml b/pyproject.toml index 1aa8c71..7229ae4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,13 +37,15 @@ codec = [ xarray = ["xarray>=2023.1.0"] fsspec = ["fsspec>=2023.1.0", "s3fs>=2023.1.0"] grids = ["pyproj>=3.1.0"] +dask = ["dask[array]>=2023.1.0"] all = [ "zarr>=2.18.2", "numcodecs>=0.12.1", "xarray>=2023.1.0", "fsspec>=2023.10.0", "s3fs>=2023.1.0", - "pyproj>=3.3.0" + "pyproj>=3.3.0", + "dask[array]>=2023.1.0", ] [dependency-groups] diff --git a/python/omfiles/_rust/__init__.pyi b/python/omfiles/_rust/__init__.pyi index 5ecfff6..17e60f1 100644 --- a/python/omfiles/_rust/__init__.pyi +++ b/python/omfiles/_rust/__init__.pyi @@ -573,6 +573,46 @@ class OmFileWriter: Raises: ValueError: If the data type is unsupported or if parameters are invalid """ + def write_array_streaming( + self, + dimensions: typing.Sequence[builtins.int], + chunks: typing.Sequence[builtins.int], + chunk_iterator: typing.Iterator, + dtype: numpy.dtype, + scale_factor: typing.Optional[builtins.float] = None, + add_offset: typing.Optional[builtins.float] = None, + compression: typing.Optional[builtins.str] = None, + name: typing.Optional[builtins.str] = None, + children: typing.Optional[typing.Sequence[OmVariable]] = None, + ) -> OmVariable: + r""" + Write an array to the .om file by streaming chunks from a Python iterator. + + This method is designed for writing large arrays that do not fit in memory. + Instead of providing the full array, you provide the full array dimensions + and an iterator that yields numpy array chunks. + + Chunks MUST be yielded in row-major order (C-order) of the chunk grid. + Each chunk's shape determines how many internal file chunks it covers. + + Args: + dimensions: Shape of the full array (e.g., [1000, 2000]) + chunks: Chunk sizes for each dimension (e.g., [100, 200]) + chunk_iterator: Python iterable yielding numpy arrays, one per chunk region + dtype: Numpy dtype of the array (e.g., np.dtype(np.float32)) + scale_factor: Scale factor for data compression (default: 1.0) + add_offset: Offset value for data compression (default: 0.0) + compression: Compression algorithm to use (default: "pfor_delta_2d") + name: Name of the variable (default: "data") + children: List of child variables (default: []) + + Returns: + :py:data:`omfiles.OmVariable` representing the written array in the file structure + + Raises: + ValueError: If the dtype is unsupported or parameters are invalid + RuntimeError: If there's an error during compression or I/O + """ def write_scalar( self, value: typing.Any, name: builtins.str, children: typing.Optional[typing.Sequence[OmVariable]] = None ) -> OmVariable: diff --git a/python/omfiles/dask.py b/python/omfiles/dask.py new file mode 100644 index 0000000..ffcc303 --- /dev/null +++ b/python/omfiles/dask.py @@ -0,0 +1,139 @@ +"""Dask array integration for writing to OM files.""" + +import math +from typing import Iterator, Optional, Sequence + +import numpy as np + +from omfiles._rust import OmFileWriter, OmVariable + +try: + import dask.array as da +except ImportError: + raise ImportError("omfiles[dask] is required for dask functionality") + + +def _validate_chunk_alignment( + data_chunks: tuple, + om_chunks: list[int], + array_shape: tuple, +) -> None: + """ + Validate dask chunks are compatible with OM chunks for block-level streaming. + + Every non-last dask chunk along each dimension must be an exact multiple + of the corresponding OM chunk size (the last chunk may be smaller). + Additionally, for the leftmost dimension where a dask block contains more + than one OM chunk, every trailing dimension must be fully covered by each + dask block. This ensures the local chunk traversal inside a block matches + the global file order. + """ + ndim = len(om_chunks) + + for d in range(ndim): + dim_chunks = data_chunks[d] + for i, c in enumerate(dim_chunks[:-1]): + if c % om_chunks[d] != 0: + raise ValueError( + f"Dask chunk size {c} along dimension {d} (block {i}) " + f"is not a multiple of the OM chunk size {om_chunks[d]}." + ) + + first_multi = None + for d in range(ndim): + local_n = max(math.ceil(c / om_chunks[d]) for c in data_chunks[d]) + if local_n > 1: + first_multi = d + break + + if first_multi is not None: + for d in range(first_multi + 1, ndim): + dim_chunks = data_chunks[d] + if not (len(dim_chunks) == 1 and dim_chunks[0] == array_shape[d]): + raise ValueError( + f"Dask blocks have multiple OM chunks in dimension {first_multi}, " + f"but dimension {d} is not fully covered by each dask block " + f"(dask chunks {dim_chunks} vs array size {array_shape[d]}). " + f"Rechunk so trailing dimensions are fully covered." + ) + + +def _dask_block_iterator(dask_array: da.Array) -> Iterator[np.ndarray]: + """ + Yield computed numpy arrays from a dask array in C-order block traversal. + + The OM file format requires chunks to be written in sequential order + corresponding to a row-major (C-order) traversal of the chunk grid. + np.ndindex yields indices in C-order: the last axis index varies fastest. + """ + for block_indices in np.ndindex(*dask_array.numblocks): + yield dask_array.blocks[block_indices].compute() + + +def write_dask_array( + writer: OmFileWriter, + data: da.Array, + chunks: Optional[Sequence[int]] = None, + scale_factor: float = 1.0, + add_offset: float = 0.0, + compression: str = "pfor_delta_2d", + name: str = "data", + children: Optional[Sequence[OmVariable]] = None, +) -> OmVariable: + """ + Write a dask array to an OM file using streaming/incremental writes. + + Iterates over the blocks of the dask array, computing each block + on-the-fly, and streams them to the OM file writer. Only one block + is held in memory at a time. + + The dask array's chunk structure is used to determine the OM file's + chunk dimensions by default. Dask chunks must be multiples of the OM + chunk sizes (except the last chunk along each dimension which may be + smaller). When a dask block contains more than one OM chunk in a + dimension, all trailing dimensions must be fully covered by each block. + + Performance: write speed depends on the number of dask tasks, not just + data size. For best performance, use dask chunks much larger than the + OM chunk sizes — ideally covering the full extent of trailing dimensions. + For example, with OM chunks of (124, 124) on an (8192, 8192) array, + dask chunks of (124, 8192) will write ~10x faster than (124, 124). + + Args: + writer: An open OmFileWriter instance. + data: A dask array to write. + chunks: OM file chunk sizes per dimension. If None, uses the dask + array's chunk sizes. Dask chunks must be multiples of these. + scale_factor: Scale factor for float compression (default: 1.0). + add_offset: Offset for float compression (default: 0.0). + compression: Compression algorithm (default: "pfor_delta_2d"). + name: Variable name (default: "data"). + children: Child variables (default: None). + + Returns: + OmVariable representing the written array. + + Raises: + TypeError: If data is not a dask array. + ValueError: If dask chunks are incompatible with OM chunks. + """ + if not isinstance(data, da.Array): + raise TypeError(f"Expected a dask array, got {type(data)}") + + if chunks is not None and len(chunks) != data.ndim: + raise ValueError(f"chunks has {len(chunks)} element(s) but data has {data.ndim} dimension(s).") + + om_chunks: list[int] = list(chunks) if chunks is not None else [c[0] for c in data.chunks] + _validate_chunk_alignment(data.chunks, om_chunks, data.shape) + + return writer.write_array_streaming( + dimensions=[int(d) for d in data.shape], + chunks=om_chunks, + chunk_iterator=_dask_block_iterator(data), + dtype=data.dtype, + scale_factor=scale_factor, + add_offset=add_offset, + compression=compression, + name=name, + children=list(children) if children is not None else [], + ) diff --git a/src/writer.rs b/src/writer.rs index 37ba160..6893379 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -9,12 +9,13 @@ use numpy::{ }; use omfiles_rs::{ traits::{OmFileArrayDataType, OmFileScalarDataType, OmFileWriterBackend}, - writer::{OmFileWriter as OmFileWriterRs, OmFileWriterArrayFinalized}, + writer::{OmFileWriter as OmFileWriterRs, OmFileWriterArray}, OmCompressionType, OmFilesError, OmOffsetSize, }; use pyo3::{ - exceptions::{PyRuntimeError, PyValueError}, + exceptions::{PyRuntimeError, PyStopIteration, PyValueError}, prelude::*, + types::PyIterator, }; use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods}; use std::{ @@ -22,6 +23,133 @@ use std::{ sync::{Mutex, PoisonError}, }; +/// Helper to convert OmOffsetSize to OmVariable +fn to_variable(name: &str, os: OmOffsetSize) -> OmVariable { + OmVariable { + name: name.to_string(), + offset: os.offset, + size: os.size, + } +} + +/// All array element types supported by the writer. +enum OmElementType { + Float32, + Float64, + Int8, + Uint8, + Int16, + Uint16, + Int32, + Uint32, + Int64, + Uint64, +} + +impl OmElementType { + /// Resolve from a numpy `PyArrayDescr` (used by `write_array`). + fn from_numpy_dtype(d: &Bound<'_, PyArrayDescr>) -> PyResult { + let py = d.py(); + if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Float32) + } else if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Float64) + } else if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Int8) + } else if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Uint8) + } else if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Int16) + } else if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Uint16) + } else if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Int32) + } else if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Uint32) + } else if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Int64) + } else if d.is_equiv_to(&dtype::(py)) { + Ok(Self::Uint64) + } else { + Err(OmFileWriter::unsupported_array_type_error(d.clone())) + } + } +} + +/// Abstracts over the two write strategies (full array vs streaming iterator). +enum Feeder<'a, 'py> { + /// Feeds an entire numpy array in a single `write_data` call. + Full { + data: &'a Bound<'py, PyUntypedArray>, + }, + /// Feeds data chunk-by-chunk from a Python iterator. + Streaming { + py: Python<'py>, + iter: Bound<'py, PyAny>, + }, +} + +impl<'a, 'py> Feeder<'a, 'py> { + fn feed( + self, + w: &mut OmFileWriterArray<'_, T, WriterBackendImpl>, + ) -> PyResult<()> { + match self { + Feeder::Full { data } => { + let array = data.cast::>()?.readonly(); + w.write_data(array.as_array(), None, None) + .map_err(convert_omfilesrs_error) + } + Feeder::Streaming { py, iter } => loop { + match iter.call_method0("__next__") { + Ok(item) => { + let array: PyReadonlyArrayDyn<'_, T> = item.extract()?; + w.write_data(array.as_array(), None, None) + .map_err(convert_omfilesrs_error)?; + } + Err(err) if err.is_instance_of::(py) => break Ok(()), + Err(err) => break Err(err), + } + }, + } + } +} + +/// Resolved parameters shared by both `write_array` and `write_array_streaming`. +struct WriteArrayParams<'a> { + name: &'a str, + children: Vec, + scale_factor: f32, + add_offset: f32, + compression: OmCompressionType, +} + +impl<'a> WriteArrayParams<'a> { + fn from_options( + name: Option<&'a str>, + children: Option>, + scale_factor: Option, + add_offset: Option, + compression: Option<&str>, + ) -> PyResult { + Ok(Self { + name: name.unwrap_or("data"), + children: children + .unwrap_or_default() + .iter() + .map(Into::into) + .collect(), + scale_factor: scale_factor.unwrap_or(1.0), + add_offset: add_offset.unwrap_or(0.0), + compression: compression + .map(|s| PyCompressionType::from_str(s)) + .transpose()? + .unwrap_or(PyCompressionType::PforDelta2d) + .to_omfilesrs(), + }) + } +} + /// A Python wrapper for the Rust OmFileWriter implementation. #[gen_stub_pyclass] #[pyclass] @@ -55,66 +183,80 @@ impl OmFileWriter { PyErr::new::(format!("Unsupported scalar data type: {}", type_name)) } - // Helper method for safe writer access + /// Helper method for safe writer access. fn with_writer(&self, f: F) -> PyResult where F: FnOnce(&mut OmFileWriterRs) -> PyResult, { let mut guard = self.writer.lock().map_err(|e| Self::lock_error(e))?; - match guard.as_mut() { Some(writer) => f(writer), None => Err(Self::closed_error()), } } - fn write_array_internal<'py, T>( - &mut self, - data: PyReadonlyArrayDyn<'py, T>, + /// Unified 10-way type dispatch. + /// + /// Resolves `element_type` to a concrete `T`, prepares a typed array writer, + /// feeds data, finalizes, and registers the result as a named variable. + /// Both `write_array` and `write_array_streaming` delegate here. + fn write_array_dispatched( + &self, + element_type: OmElementType, + dimensions: Vec, chunks: Vec, - scale_factor: f32, - add_offset: f32, - compression: OmCompressionType, - ) -> PyResult - where - T: Element + OmFileArrayDataType, - { - let dimensions = data - .shape() - .into_iter() - .map(|x| *x as u64) - .collect::>(); - + params: &WriteArrayParams<'_>, + feeder: Feeder<'_, '_>, + ) -> PyResult { self.with_writer(|writer| { - let mut array_writer = writer - .prepare_array::(dimensions, chunks, compression, scale_factor, add_offset) - .map_err(convert_omfilesrs_error)?; + macro_rules! dispatch { + ($($variant:ident => $T:ty),+ $(,)?) => { + match element_type { + $(OmElementType::$variant => { + let mut w = writer + .prepare_array::<$T>( + dimensions, chunks, params.compression, + params.scale_factor, params.add_offset, + ) + .map_err(convert_omfilesrs_error)?; + feeder.feed::<$T>(&mut w)?; + w.finalize() + }),+ + } + }; + } - array_writer - .write_data(data.as_array(), None, None) - .map_err(convert_omfilesrs_error)?; + let array_meta = dispatch! { + Float32 => f32, + Float64 => f64, + Int8 => i8, + Uint8 => u8, + Int16 => i16, + Uint16 => u16, + Int32 => i32, + Uint32 => u32, + Int64 => i64, + Uint64 => u64, + }; - let variable_meta = array_writer.finalize(); - Ok(variable_meta) + writer + .write_array(array_meta, params.name, ¶ms.children) + .map_err(convert_omfilesrs_error) + .map(|os| to_variable(params.name, os)) }) } fn store_scalar( - &mut self, + &self, value: T, name: &str, children: &[OmOffsetSize], ) -> PyResult { self.with_writer(|writer| { - let offset_size = writer + writer .write_scalar(value, name, children) - .map_err(convert_omfilesrs_error)?; - - Ok(OmVariable { - name: name.to_string(), - offset: offset_size.offset, - size: offset_size.size, - }) + .map_err(convert_omfilesrs_error) + .map(|os| to_variable(name, os)) }) } } @@ -180,8 +322,9 @@ impl OmFileWriter { let mut guard = self.writer.lock().map_err(|e| Self::lock_error(e))?; if let Some(writer) = guard.as_mut() { - let result = writer.write_trailer(root_variable.into()); - result.map_err(convert_omfilesrs_error)?; + writer + .write_trailer(root_variable.into()) + .map_err(convert_omfilesrs_error)?; // Take ownership and drop to ensure proper file closure guard.take(); } else { @@ -195,7 +338,6 @@ impl OmFileWriter { #[getter] fn closed(&self) -> PyResult { let guard = self.writer.lock().map_err(|e| Self::lock_error(e))?; - Ok(guard.is_none()) } @@ -237,69 +379,66 @@ impl OmFileWriter { name: Option<&str>, children: Option>, ) -> PyResult { - let name = name.unwrap_or("data"); - let children: Vec = children - .unwrap_or_default() - .iter() - .map(Into::into) - .collect(); + let params = + WriteArrayParams::from_options(name, children, scale_factor, add_offset, compression)?; + let element_type = OmElementType::from_numpy_dtype(&data.dtype())?; + let dimensions = data.shape().iter().map(|x| *x as u64).collect(); + let feeder = Feeder::Full { data }; - let element_type = data.dtype(); - let py = data.py(); - - let scale_factor = scale_factor.unwrap_or(1.0); - let add_offset = add_offset.unwrap_or(0.0); - let compression = compression - .map(|s| PyCompressionType::from_str(s)) - .transpose()? - .unwrap_or(PyCompressionType::PforDelta2d) - .to_omfilesrs(); - - let array_meta = if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else if element_type.is_equiv_to(&dtype::(py)) { - let array = data.cast::>()?.readonly(); - self.write_array_internal(array, chunks, scale_factor, add_offset, compression) - } else { - Err(Self::unsupported_array_type_error(element_type)) - }?; + self.write_array_dispatched(element_type, dimensions, chunks, ¶ms, feeder) + } - self.with_writer(|writer| { - let offset_size = writer - .write_array(array_meta, name, &children) - .map_err(convert_omfilesrs_error)?; + /// Write an array to the .om file by streaming chunks from a Python iterator. + /// + /// This method is designed for writing large arrays that do not fit in memory. + /// Instead of providing the full array, you provide the full array dimensions + /// and an iterator that yields numpy array chunks. + /// + /// Chunks MUST be yielded in row-major order (C-order) of the chunk grid. + /// Each chunk's shape determines how many internal file chunks it covers. + /// + /// Args: + /// dimensions: Shape of the full array (e.g., [1000, 2000]) + /// chunks: Chunk sizes for each dimension (e.g., [100, 200]) + /// chunk_iterator: Python iterable yielding numpy arrays, one per chunk region + /// dtype: Numpy dtype of the array (e.g., np.dtype(np.float32)) + /// scale_factor: Scale factor for data compression (default: 1.0) + /// add_offset: Offset value for data compression (default: 0.0) + /// compression: Compression algorithm to use (default: "pfor_delta_2d") + /// name: Name of the variable (default: "data") + /// children: List of child variables (default: []) + /// + /// Returns: + /// :py:data:`omfiles.OmVariable` representing the written array in the file structure + /// + /// Raises: + /// ValueError: If the dtype is unsupported or parameters are invalid + /// RuntimeError: If there's an error during compression or I/O + #[pyo3( + text_signature = "(dimensions, chunks, chunk_iterator, dtype, scale_factor=1.0, add_offset=0.0, compression='pfor_delta_2d', name='data', children=[])", + signature = (dimensions, chunks, chunk_iterator, dtype, scale_factor=None, add_offset=None, compression=None, name=None, children=None) + )] + fn write_array_streaming<'py>( + &mut self, + py: Python<'_>, + dimensions: Vec, + chunks: Vec, + #[gen_stub(override_type(type_repr="typing.Iterator", imports=("typing")))] + chunk_iterator: &Bound<'_, PyIterator>, + dtype: &Bound<'py, PyArrayDescr>, + scale_factor: Option, + add_offset: Option, + compression: Option<&str>, + name: Option<&str>, + children: Option>, + ) -> PyResult { + let params = + WriteArrayParams::from_options(name, children, scale_factor, add_offset, compression)?; + let element_type = OmElementType::from_numpy_dtype(dtype)?; + let iter = chunk_iterator.call_method0("__iter__")?; + let feeder = Feeder::Streaming { py, iter }; - Ok(OmVariable { - name: name.to_string(), - offset: offset_size.offset, - size: offset_size.size, - }) - }) + self.write_array_dispatched(element_type, dimensions, chunks, ¶ms, feeder) } /// Write a scalar value to the .om file. @@ -406,15 +545,10 @@ impl OmFileWriter { let children: Vec = children.iter().map(Into::into).collect(); self.with_writer(|writer| { - let offset_size = writer + writer .write_none(name, &children) - .map_err(convert_omfilesrs_error)?; - - Ok(OmVariable { - name: name.to_string(), - offset: offset_size.offset, - size: offset_size.size, - }) + .map_err(convert_omfilesrs_error) + .map(|os| to_variable(name, os)) }) } } diff --git a/tests/conftest.py b/tests/conftest.py index fc984eb..6a614c8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,6 +97,9 @@ def empty_temp_om_file(): warnings.warn(f"Failed to remove temporary file {filename}: {e}") +empty_temp_om_file_2 = empty_temp_om_file + + @pytest.fixture def icon_d2_meta_json() -> str: # return meta_str diff --git a/tests/test_chunk_reader.py b/tests/test_chunk_reader.py index 093558e..ad779f0 100644 --- a/tests/test_chunk_reader.py +++ b/tests/test_chunk_reader.py @@ -92,7 +92,7 @@ def test_load_data_success(chunk_reader: OmChunkFileReader, icond2_om_chunks_met for chunk_idx in chunk_reader.chunk_indices: chunk_times = icond2_om_chunks_meta.get_chunk_time_range(chunk_idx) time_mask = (chunk_times >= chunk_reader.start_date) & (chunk_times <= chunk_reader.end_date) - num_points = np.sum(time_mask) + num_points = int(np.sum(time_mask)) # Create mock data with the correct length expected_data.append(np.arange(num_points, dtype=np.float32)) diff --git a/tests/test_dask.py b/tests/test_dask.py new file mode 100644 index 0000000..ea882f3 --- /dev/null +++ b/tests/test_dask.py @@ -0,0 +1,195 @@ +import dask.array as da +import numpy as np +import pytest +from omfiles import OmFileReader, OmFileWriter +from omfiles.dask import write_dask_array + + +@pytest.fixture +def dask_array_2d(): + np_data = np.arange(200, dtype=np.float32).reshape(10, 20) + return da.from_array(np_data, chunks=(5, 10)) # type: ignore[arg-type] + + +@pytest.fixture +def dask_array_3d(): + np_data = np.arange(192, dtype=np.int32).reshape(4, 6, 8) + return da.from_array(np_data, chunks=(2, 3, 4)) # type: ignore[arg-type] + + +def test_dask_roundtrip_2d(empty_temp_om_file, dask_array_2d): + expected = dask_array_2d.compute() + + writer = OmFileWriter(empty_temp_om_file) + var = write_dask_array(writer, dask_array_2d, scale_factor=10000.0) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_almost_equal(result, expected, decimal=4) + + +def test_dask_roundtrip_3d(empty_temp_om_file, dask_array_3d): + expected = dask_array_3d.compute() + + writer = OmFileWriter(empty_temp_om_file) + var = write_dask_array(writer, dask_array_3d) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_equal(result, expected) + + +def test_dask_boundary_chunks(empty_temp_om_file): + np_data = np.arange(91, dtype=np.float32).reshape(7, 13) + darr = da.from_array(np_data, chunks=(4, 5)) # type: ignore[arg-type] + + writer = OmFileWriter(empty_temp_om_file) + var = write_dask_array(writer, darr, scale_factor=10000.0) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_almost_equal(result, np_data, decimal=4) + + +def test_dask_custom_name(empty_temp_om_file, dask_array_2d): + writer = OmFileWriter(empty_temp_om_file) + var = write_dask_array(writer, dask_array_2d, scale_factor=10000.0, name="temperature") + assert var.name == "temperature" + writer.close(var) + + +def test_dask_non_multiple_chunks_raises(empty_temp_om_file): + """Dask chunks that aren't multiples of OM chunks should raise.""" + np_data = np.arange(30, dtype=np.float32).reshape(6, 5) + darr = da.from_array(np_data, chunks=(3, 5)) # type: ignore[arg-type] + + writer = OmFileWriter(empty_temp_om_file) + with pytest.raises(ValueError, match="not a multiple"): + write_dask_array(writer, darr, chunks=[2, 5]) + + +def test_dask_larger_chunks_than_om_2d(empty_temp_om_file): + """Dask blocks spanning multiple OM chunks along dim 1 (full trailing dim).""" + np_data = np.arange(200, dtype=np.float32).reshape(10, 20) + darr = da.from_array(np_data, chunks=(10, 20)) # type: ignore[arg-type] + + writer = OmFileWriter(empty_temp_om_file) + var = write_dask_array(writer, darr, chunks=[5, 10], scale_factor=10000.0) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_almost_equal(result, np_data, decimal=4) + + +def test_dask_larger_chunks_than_om_3d(empty_temp_om_file): + """Dask blocks with full trailing dims, multiple OM chunks in dim 0.""" + np_data = np.arange(192, dtype=np.int32).reshape(4, 6, 8) + darr = da.from_array(np_data, chunks=(4, 6, 8)) # type: ignore[arg-type] + + writer = OmFileWriter(empty_temp_om_file) + var = write_dask_array(writer, darr, chunks=[2, 3, 4]) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_equal(result, np_data) + + +def test_dask_single_om_chunk_per_slow_dim(empty_temp_om_file): + """Dask blocks with 1 OM chunk in dim 0, partial trailing dim coverage.""" + np_data = np.arange(200, dtype=np.float32).reshape(10, 20) + darr = da.from_array(np_data, chunks=(5, 10)) # type: ignore[arg-type] + + writer = OmFileWriter(empty_temp_om_file) + var = write_dask_array(writer, darr, chunks=[5, 5], scale_factor=10000.0) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_almost_equal(result, np_data, decimal=4) + + +def test_dask_misaligned_trailing_dims_raises(empty_temp_om_file): + np_data = np.arange(200, dtype=np.float32).reshape(10, 20) + darr = da.from_array(np_data, chunks=(10, 10)) # type: ignore[arg-type] + + writer = OmFileWriter(empty_temp_om_file) + with pytest.raises(ValueError, match="not fully covered"): + write_dask_array(writer, darr, chunks=[5, 5]) + + +def test_dask_not_a_dask_array_raises(empty_temp_om_file): + np_data = np.arange(20, dtype=np.float32).reshape(4, 5) + writer = OmFileWriter(empty_temp_om_file) + with pytest.raises(TypeError, match="Expected a dask array"): + write_dask_array(writer, np_data) # type: ignore[arg-type] + + +@pytest.mark.parametrize( + "bad_chunks", + [ + pytest.param([5], id="too_few"), + pytest.param([5, 10, 4], id="too_many"), + ], +) +def test_dask_chunk_ndim_mismatch_raises(empty_temp_om_file, bad_chunks): + np_data = np.arange(200, dtype=np.float32).reshape(10, 20) + darr = da.from_array(np_data, chunks=(5, 10)) # type: ignore[arg-type] + + writer = OmFileWriter(empty_temp_om_file) + with pytest.raises(ValueError, match=r"chunks has \d+ element"): + write_dask_array(writer, darr, chunks=bad_chunks) + + +def test_dask_irregular_chunks_misaligned_raises(empty_temp_om_file): + """ + Non-first dask block spans multiple OM chunks while trailing dim is not + fully covered. + + Array (12, 16), dask chunks ((4, 8), (8, 8)), OM chunks [4, 8]: + block (1,0) shape (8, 8) → 2 OM rows but only 1 of 2 OM columns. + """ + np_data = np.arange(192, dtype=np.float32).reshape(12, 16) + darr = da.from_array(np_data, chunks=((4, 8), (8, 8))) # type: ignore[arg-type] + + writer = OmFileWriter(empty_temp_om_file) + with pytest.raises(ValueError, match="not fully covered"): + write_dask_array(writer, darr, chunks=[4, 8]) + + +def test_dask_irregular_chunks_valid_roundtrip(empty_temp_om_file): + """ + Non-first dask block spans multiple OM chunks but trailing dim IS fully + covered — this configuration is valid and must produce correct output. + + Array (12, 16), dask chunks ((4, 8), (16,)), OM chunks [4, 8]: + block (1,0) shape (8, 16) → 2 OM rows and all OM columns — safe. + """ + np_data = np.arange(192, dtype=np.float32).reshape(12, 16) + darr = da.from_array(np_data, chunks=((4, 8), (16,))) # type: ignore[arg-type] + + writer = OmFileWriter(empty_temp_om_file) + var = write_dask_array(writer, darr, chunks=[4, 8], scale_factor=10000.0) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_almost_equal(result, np_data, decimal=4) diff --git a/tests/test_fsspec.py b/tests/test_fsspec.py index 9e50cfa..31edebf 100644 --- a/tests/test_fsspec.py +++ b/tests/test_fsspec.py @@ -2,7 +2,6 @@ import os import tempfile import threading -from typing import Tuple import fsspec import numpy as np diff --git a/tests/test_read_write.py b/tests/test_read_write.py index f6daeda..e658a08 100644 --- a/tests/test_read_write.py +++ b/tests/test_read_write.py @@ -28,19 +28,19 @@ def test_round_trip_array_datatypes(): shape = (5, 5, 5, 2) chunks = [2, 2, 2, 1] test_cases = [ - (np.random.rand(*shape).astype(np.float32), "float32"), - (np.random.rand(*shape).astype(np.float64), "float64"), - (np.random.randint(-128, 127, size=shape, dtype=np.int8), "int8"), - (np.random.randint(-32768, 32767, size=shape, dtype=np.int16), "int16"), - (np.random.randint(-2147483648, 2147483647, size=shape, dtype=np.int32), "int32"), - (np.random.randint(-9223372036854775808, 9223372036854775807, size=shape, dtype=np.int64), "int64"), - (np.random.randint(0, 255, size=shape, dtype=np.uint8), "uint8"), - (np.random.randint(0, 65535, size=shape, dtype=np.uint16), "uint16"), - (np.random.randint(0, 4294967295, size=shape, dtype=np.uint32), "uint32"), - (np.random.randint(0, 18446744073709551615, size=shape, dtype=np.uint64), "uint64"), + np.random.rand(*shape).astype(np.float32), + np.random.rand(*shape).astype(np.float64), + np.random.randint(-128, 127, size=shape, dtype=np.int8), + np.random.randint(-32768, 32767, size=shape, dtype=np.int16), + np.random.randint(-2147483648, 2147483647, size=shape, dtype=np.int32), + np.random.randint(-9223372036854775808, 9223372036854775807, size=shape, dtype=np.int64), + np.random.randint(0, 255, size=shape, dtype=np.uint8), + np.random.randint(0, 65535, size=shape, dtype=np.uint16), + np.random.randint(0, 4294967295, size=shape, dtype=np.uint32), + np.random.randint(0, 18446744073709551615, size=shape, dtype=np.uint64), ] - for test_data, dtype in test_cases: + for test_data in test_cases: with tempfile.NamedTemporaryFile(suffix=".om") as temp_file: writer = omfiles.OmFileWriter(temp_file.name) variable = writer.write_array(test_data, chunks=chunks, scale_factor=10000.0, add_offset=0.0) diff --git a/tests/test_streaming_write.py b/tests/test_streaming_write.py new file mode 100644 index 0000000..3cfbd96 --- /dev/null +++ b/tests/test_streaming_write.py @@ -0,0 +1,220 @@ +import numpy as np +import pytest +from omfiles import OmFileReader, OmFileWriter + + +def test_streaming_single_chunk(empty_temp_om_file): + shape = (10, 20) + chunks = [10, 20] + data = np.arange(np.prod(shape), dtype=np.float32).reshape(shape) + + writer = OmFileWriter(empty_temp_om_file) + + def chunk_iter(): + yield data + + var = writer.write_array_streaming( + dimensions=list(shape), + chunks=chunks, + chunk_iterator=chunk_iter(), + dtype=np.dtype(np.float32), + scale_factor=10000.0, + ) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_almost_equal(result, data, decimal=4) + + +def test_streaming_multiple_chunks_2d(empty_temp_om_file): + shape = (10, 20) + chunks = [5, 10] + data = np.arange(np.prod(shape), dtype=np.float32).reshape(shape) + + writer = OmFileWriter(empty_temp_om_file) + + def chunk_iter(): + for i in range(0, 10, 5): + for j in range(0, 20, 10): + yield data[i : i + 5, j : j + 10].copy() + + var = writer.write_array_streaming( + dimensions=list(shape), + chunks=chunks, + chunk_iterator=chunk_iter(), + dtype=np.dtype(np.float32), + scale_factor=10000.0, + ) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_almost_equal(result, data, decimal=4) + + +def test_streaming_all_dtypes(empty_temp_om_file): + shape = (6, 8) + chunks = [3, 4] + dtypes = [ + np.float32, + np.float64, + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + ] + + for dt in dtypes: + if np.issubdtype(dt, np.floating): + data = np.random.rand(*shape).astype(dt) + elif np.issubdtype(dt, np.signedinteger): + info = np.iinfo(dt) + data = np.random.randint(max(info.min, -1000), min(info.max, 1000), size=shape, dtype=dt) + else: + info = np.iinfo(dt) + data = np.random.randint(0, min(info.max, 1000), size=shape, dtype=dt) + + writer = OmFileWriter(empty_temp_om_file) + + def chunk_iter(d=data): + for i in range(0, shape[0], chunks[0]): + for j in range(0, shape[1], chunks[1]): + ie = min(i + chunks[0], shape[0]) + je = min(j + chunks[1], shape[1]) + yield d[i:ie, j:je].copy() + + var = writer.write_array_streaming( + dimensions=list(shape), + chunks=chunks, + chunk_iterator=chunk_iter(), + dtype=np.dtype(dt), + scale_factor=10000.0, + ) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + assert result.dtype == dt, f"dtype mismatch for {dt}" + if np.issubdtype(dt, np.floating): + np.testing.assert_array_almost_equal(result, data, decimal=4) + else: + np.testing.assert_array_equal(result, data) + + +def test_streaming_3d_array(empty_temp_om_file): + shape = (4, 6, 8) + chunks = [2, 3, 4] + data = np.arange(np.prod(shape), dtype=np.int32).reshape(shape) + + writer = OmFileWriter(empty_temp_om_file) + + def chunk_iter(): + for i in range(0, shape[0], chunks[0]): + for j in range(0, shape[1], chunks[1]): + for k in range(0, shape[2], chunks[2]): + ie = min(i + chunks[0], shape[0]) + je = min(j + chunks[1], shape[1]) + ke = min(k + chunks[2], shape[2]) + yield data[i:ie, j:je, k:ke].copy() + + var = writer.write_array_streaming( + dimensions=list(shape), + chunks=chunks, + chunk_iterator=chunk_iter(), + dtype=np.dtype(np.int32), + ) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_equal(result, data) + + +def test_streaming_boundary_chunks(empty_temp_om_file): + shape = (7, 13) + chunks = [4, 5] + data = np.arange(np.prod(shape), dtype=np.float32).reshape(shape) + + writer = OmFileWriter(empty_temp_om_file) + + def chunk_iter(): + for i in range(0, shape[0], chunks[0]): + for j in range(0, shape[1], chunks[1]): + ie = min(i + chunks[0], shape[0]) + je = min(j + chunks[1], shape[1]) + yield data[i:ie, j:je].copy() + + var = writer.write_array_streaming( + dimensions=list(shape), + chunks=chunks, + chunk_iterator=chunk_iter(), + dtype=np.dtype(np.float32), + scale_factor=10000.0, + ) + writer.close(var) + + reader = OmFileReader(empty_temp_om_file) + result = reader[:] + reader.close() + + np.testing.assert_array_almost_equal(result, data, decimal=4) + + +def test_streaming_matches_write_array(empty_temp_om_file, empty_temp_om_file_2): + shape = (10, 20) + chunks = [5, 10] + data = np.arange(np.prod(shape), dtype=np.float32).reshape(shape) + + writer1 = OmFileWriter(empty_temp_om_file) + var1 = writer1.write_array(data, chunks=chunks, scale_factor=10000.0) + writer1.close(var1) + reader1 = OmFileReader(empty_temp_om_file) + result1 = reader1[:] + reader1.close() + + writer2 = OmFileWriter(empty_temp_om_file_2) + + def chunk_iter(): + for i in range(0, shape[0], chunks[0]): + for j in range(0, shape[1], chunks[1]): + ie = min(i + chunks[0], shape[0]) + je = min(j + chunks[1], shape[1]) + yield data[i:ie, j:je].copy() + + var2 = writer2.write_array_streaming( + dimensions=list(shape), + chunks=chunks, + chunk_iterator=chunk_iter(), + dtype=np.dtype(np.float32), + scale_factor=10000.0, + ) + writer2.close(var2) + reader2 = OmFileReader(empty_temp_om_file_2) + result2 = reader2[:] + reader2.close() + + np.testing.assert_array_equal(result1, result2) + + +def test_streaming_unsupported_dtype_raises(empty_temp_om_file): + writer = OmFileWriter(empty_temp_om_file) + with pytest.raises(ValueError, match="Unsupported array data type"): + writer.write_array_streaming( + dimensions=[10], + chunks=[5], + chunk_iterator=iter([]), + dtype=np.dtype(np.complex128), + ) diff --git a/uv.lock b/uv.lock index 10fd524..e226cbd 100644 --- a/uv.lock +++ b/uv.lock @@ -363,6 +363,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/68/687187c7e26cb24ccbd88e5069f5ef00eba804d36dde11d99aad0838ab45/charset_normalizer-3.4.6-py3-none-any.whl", hash = "sha256:947cf925bc916d90adba35a64c82aace04fa39b46b52d4630ece166655905a69", size = 61455, upload-time = "2026-03-15T18:53:23.833Z" }, ] +[[package]] +name = "click" +version = "8.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, +] + +[[package]] +name = "cloudpickle" +version = "3.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -372,6 +393,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "dask" +version = "2026.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "cloudpickle" }, + { name = "fsspec" }, + { name = "importlib-metadata", marker = "python_full_version < '3.12'" }, + { name = "packaging" }, + { name = "partd" }, + { name = "pyyaml" }, + { name = "toolz" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d7/2a/5d8cc1579590af86576dde890254440e478c7174b93a02095ecfc2e6ba38/dask-2026.3.0.tar.gz", hash = "sha256:f7d96c8274e8a900d217c1ff6ea8d1bbf0b4c2c21e74a409644498d925eb8f85", size = 11000710, upload-time = "2026-03-18T07:10:14.945Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/f3/00bb1e867fba351e2d784170955713bee200c43ea306c59f30bd7e748192/dask-2026.3.0-py3-none-any.whl", hash = "sha256:be614b9242b0b38288060fb2d7696125946469c98a1c30e174883fd199e0428d", size = 1485630, upload-time = "2026-03-18T07:10:12.832Z" }, +] + +[package.optional-dependencies] +array = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] + [[package]] name = "docutils" version = "0.22.4" @@ -597,6 +643,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/53/fb7122b71361a0d121b669dcf3d31244ef75badbbb724af388948de543e2/imagesize-2.0.0-py2.py3-none-any.whl", hash = "sha256:5667c5bbb57ab3f1fa4bc366f4fbc971db3d5ed011fd2715fd8001f782718d96", size = 9441, upload-time = "2026-03-03T14:18:27.892Z" }, ] +[[package]] +name = "importlib-metadata" +version = "8.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "zipp" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" }, +] + [[package]] name = "iniconfig" version = "2.3.0" @@ -627,6 +685,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, ] +[[package]] +name = "locket" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/83/97b29fe05cb6ae28d2dbd30b81e2e402a3eed5f460c26e9eaa5895ceacf5/locket-1.0.0.tar.gz", hash = "sha256:5c0d4c052a8bbbf750e056a8e65ccd309086f4f0f18a2eac306a8dfa4112a632", size = 4350, upload-time = "2022-04-20T22:04:44.312Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/bc/83e112abc66cd466c6b83f99118035867cecd41802f8d044638aa78a106e/locket-1.0.0-py2.py3-none-any.whl", hash = "sha256:b6c819a722f7b6bd955b80781788e4a66a55628b858d347536b7e81325a3a5e3", size = 4398, upload-time = "2022-04-20T22:04:42.23Z" }, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -1163,6 +1230,7 @@ dependencies = [ [package.optional-dependencies] all = [ + { name = "dask", extra = ["array"] }, { name = "fsspec" }, { name = "numcodecs", version = "0.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numcodecs", version = "0.16.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -1180,6 +1248,9 @@ codec = [ { name = "zarr", version = "2.18.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "zarr", version = "3.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] +dask = [ + { name = "dask", extra = ["array"] }, +] fsspec = [ { name = "fsspec" }, { name = "s3fs" }, @@ -1226,6 +1297,8 @@ test = [ [package.metadata] requires-dist = [ + { name = "dask", extras = ["array"], marker = "extra == 'all'", specifier = ">=2023.1.0" }, + { name = "dask", extras = ["array"], marker = "extra == 'dask'", specifier = ">=2023.1.0" }, { name = "fsspec", marker = "extra == 'all'", specifier = ">=2023.10.0" }, { name = "fsspec", marker = "extra == 'fsspec'", specifier = ">=2023.1.0" }, { name = "numcodecs", marker = "extra == 'all'", specifier = ">=0.12.1" }, @@ -1240,7 +1313,7 @@ requires-dist = [ { name = "zarr", marker = "extra == 'all'", specifier = ">=2.18.2" }, { name = "zarr", marker = "extra == 'codec'", specifier = ">=2.18.2" }, ] -provides-extras = ["all", "codec", "fsspec", "grids", "xarray"] +provides-extras = ["all", "codec", "dask", "fsspec", "grids", "xarray"] [package.metadata.requires-dev] dev = [ @@ -1413,6 +1486,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/b0/34937815889fa982613775e4b97fddd13250f11012d769949c5465af2150/pandas-3.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:108dd1790337a494aa80e38def654ca3f0968cf4f362c85f44c15e471667102d", size = 9452085, upload-time = "2026-02-17T22:20:14.331Z" }, ] +[[package]] +name = "partd" +version = "1.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "locket" }, + { name = "toolz" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b2/3a/3f06f34820a31257ddcabdfafc2672c5816be79c7e353b02c1f318daa7d4/partd-1.4.2.tar.gz", hash = "sha256:d022c33afbdc8405c226621b015e8067888173d85f7f5ecebb3cafed9a20f02c", size = 21029, upload-time = "2024-05-06T19:51:41.945Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl", hash = "sha256:978e4ac767ec4ba5b86c6eaa52e5a2a3bc748a2ca839e8cc798f1cc6ce6efb0f", size = 18905, upload-time = "2024-05-06T19:51:39.271Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -2135,6 +2221,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" }, ] +[[package]] +name = "toolz" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/d6/114b492226588d6ff54579d95847662fc69196bdeec318eb45393b24c192/toolz-1.1.0.tar.gz", hash = "sha256:27a5c770d068c110d9ed9323f24f1543e83b2f300a687b7891c1a6d56b697b5b", size = 52613, upload-time = "2025-10-17T04:03:21.661Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/12/5911ae3eeec47800503a238d971e51722ccea5feb8569b735184d5fcdbc0/toolz-1.1.0-py3-none-any.whl", hash = "sha256:15ccc861ac51c53696de0a5d6d4607f99c210739caf987b5d2054f3efed429d8", size = 58093, upload-time = "2025-10-17T04:03:20.435Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -2475,3 +2570,12 @@ sdist = { url = "https://files.pythonhosted.org/packages/fc/76/7fa87f57c112c7b9c wheels = [ { url = "https://files.pythonhosted.org/packages/44/15/bb13b4913ef95ad5448490821eee4671d0e67673342e4d4070854e5fe081/zarr-3.1.5-py3-none-any.whl", hash = "sha256:29cd905afb6235b94c09decda4258c888fcb79bb6c862ef7c0b8fe009b5c8563", size = 284067, upload-time = "2025-11-21T14:05:59.235Z" }, ] + +[[package]] +name = "zipp" +version = "3.23.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, +]