diff --git a/docs/releases.md b/docs/releases.md index 338b8eda..2e3a9bb5 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -8,6 +8,10 @@ ### Bug fixes +- Fix setting `fill_value` for Zarr V2 arrays if data type is a subtype of integer or float. + ([#845](https://github.com/zarr-developers/VirtualiZarr/pull/845)). + By [Hauke Schulz](https://github.com/observingClouds). + ### Documentation ### Internal changes diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py index 0c74077f..967ea986 100644 --- a/virtualizarr/manifests/array.py +++ b/virtualizarr/manifests/array.py @@ -207,7 +207,7 @@ def __eq__( # type: ignore[override] def astype(self, dtype: np.dtype, /, *, copy: bool = True) -> "ManifestArray": """Cannot change the dtype, but needed because xarray will call this even when it's a no-op.""" - if dtype != self.dtype: + if not np.issubdtype(self.dtype, dtype): raise NotImplementedError() else: return self diff --git a/virtualizarr/parsers/dmrpp.py b/virtualizarr/parsers/dmrpp.py index 43c6425c..3b36e6a8 100644 --- a/virtualizarr/parsers/dmrpp.py +++ b/virtualizarr/parsers/dmrpp.py @@ -178,7 +178,7 @@ def parse_dataset( ) manifest_group = self._parse_dataset(dataset_element) - registry = ObjectStoreRegistry() + registry: ObjectStoreRegistry = ObjectStoreRegistry() registry.register(self.data_filepath, object_store) return ManifestStore(registry=registry, group=manifest_group) diff --git a/virtualizarr/parsers/zarr.py b/virtualizarr/parsers/zarr.py index 90922037..b4bd0a3b 100644 --- a/virtualizarr/parsers/zarr.py +++ b/virtualizarr/parsers/zarr.py @@ -4,11 +4,12 @@ from abc import ABC, abstractmethod from collections.abc import Iterable from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast import zarr from obspec_utils.registry import ObjectStoreRegistry from zarr.api.asynchronous import open_group as open_group_async +from zarr.core.dtype import parse_dtype from zarr.core.group import GroupMetadata from zarr.core.metadata import ArrayV3Metadata from zarr.storage import ObjectStore @@ -194,20 +195,11 @@ def get_metadata(self, zarr_array: ZarrArrayType) -> ArrayV3Metadata: if v2_metadata.fill_value is None: v2_dict = v2_metadata.to_dict() - v2_dict["fill_value"] = 0 + v2_dtype = parse_dtype(cast(Any, v2_dict["dtype"]), zarr_format=2) + fill_value = v2_dtype.default_scalar() + v2_dict["fill_value"] = v2_dtype.to_json_scalar(fill_value, zarr_format=2) temp_v2 = ArrayV2Metadata.from_dict(v2_dict) v3_metadata = _convert_array_metadata(temp_v2) - - # Replace with proper default for the data type - default_scalar = v3_metadata.data_type.default_scalar() - fill_value = ( - default_scalar.item() - if hasattr(default_scalar, "item") - else default_scalar - ) - v3_dict = v3_metadata.to_dict() - v3_dict["fill_value"] = fill_value - v3_metadata = ArrayV3Metadata.from_dict(v3_dict) else: # Normal conversion; allow other errors to propagate. v3_metadata = _convert_array_metadata(v2_metadata) diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 631440be..c3e3cdfa 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -131,6 +131,43 @@ def test_equals_nan_fill_value(self, array_v3_metadata): assert result.all() +class TestAstype: + def test_astype_same_dtype(self, manifest_array): + """Test that astype with the same dtype returns self.""" + marr = manifest_array( + shape=(5, 10), chunks=(5, 10), data_type=np.dtype("int32") + ) + result = marr.astype(np.dtype("int32")) + assert result is marr + + def test_astype_string_upcast(self, manifest_array): + """Test that astype allows string dtype upcasting (e.g., ObjectStore: def manifest_store_from_hdf_url(url, group: str | None = None): - registry = ObjectStoreRegistry() + registry: ObjectStoreRegistry = ObjectStoreRegistry() registry.register(url, obstore_local(url=url)) parser = HDFParser(group=group) return parser(url=url, registry=registry)