Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/releases.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

### Bug fixes

- Fix setting `fill_value` for Zarr V2 arrays if data type is a subtype of integer or float.
([#845](https://github.com/zarr-developers/VirtualiZarr/pull/845)).
By [Hauke Schulz](https://github.com/observingClouds).

### Documentation

### Internal changes
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/manifests/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def __eq__( # type: ignore[override]

def astype(self, dtype: np.dtype, /, *, copy: bool = True) -> "ManifestArray":
"""Cannot change the dtype, but needed because xarray will call this even when it's a no-op."""
if dtype != self.dtype:
if not np.issubdtype(self.dtype, dtype):
raise NotImplementedError()
else:
return self
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/parsers/dmrpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def parse_dataset(
)

manifest_group = self._parse_dataset(dataset_element)
registry = ObjectStoreRegistry()
registry: ObjectStoreRegistry = ObjectStoreRegistry()
registry.register(self.data_filepath, object_store)

return ManifestStore(registry=registry, group=manifest_group)
Expand Down
18 changes: 5 additions & 13 deletions virtualizarr/parsers/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
from abc import ABC, abstractmethod
from collections.abc import Iterable
from pathlib import Path
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, cast

import zarr
from obspec_utils.registry import ObjectStoreRegistry
from zarr.api.asynchronous import open_group as open_group_async
from zarr.core.dtype import parse_dtype
from zarr.core.group import GroupMetadata
from zarr.core.metadata import ArrayV3Metadata
from zarr.storage import ObjectStore
Expand Down Expand Up @@ -194,20 +195,11 @@ def get_metadata(self, zarr_array: ZarrArrayType) -> ArrayV3Metadata:

if v2_metadata.fill_value is None:
v2_dict = v2_metadata.to_dict()
v2_dict["fill_value"] = 0
v2_dtype = parse_dtype(cast(Any, v2_dict["dtype"]), zarr_format=2)
fill_value = v2_dtype.default_scalar()
v2_dict["fill_value"] = v2_dtype.to_json_scalar(fill_value, zarr_format=2)
temp_v2 = ArrayV2Metadata.from_dict(v2_dict)
v3_metadata = _convert_array_metadata(temp_v2)

# Replace with proper default for the data type
default_scalar = v3_metadata.data_type.default_scalar()
fill_value = (
default_scalar.item()
if hasattr(default_scalar, "item")
else default_scalar
)
v3_dict = v3_metadata.to_dict()
v3_dict["fill_value"] = fill_value
v3_metadata = ArrayV3Metadata.from_dict(v3_dict)
else:
# Normal conversion; allow other errors to propagate.
v3_metadata = _convert_array_metadata(v2_metadata)
Expand Down
37 changes: 37 additions & 0 deletions virtualizarr/tests/test_manifests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,43 @@ def test_equals_nan_fill_value(self, array_v3_metadata):
assert result.all()


class TestAstype:
def test_astype_same_dtype(self, manifest_array):
"""Test that astype with the same dtype returns self."""
marr = manifest_array(
shape=(5, 10), chunks=(5, 10), data_type=np.dtype("int32")
)
result = marr.astype(np.dtype("int32"))
assert result is marr

def test_astype_string_upcast(self, manifest_array):
"""Test that astype allows string dtype upcasting (e.g., <U16 to <U)."""
marr = manifest_array(shape=(5, 10), chunks=(5, 10), data_type=np.dtype("<U16"))
# np.dtype("<U") is the generic unicode type without length specification
result = marr.astype(np.dtype("<U"))
assert result is marr

def test_astype_bytes_upcast(self, manifest_array):
"""Test that astype allows bytes dtype upcasting (e.g., |S1 to |S)."""
marr = manifest_array(shape=(5, 10), chunks=(5, 10), data_type=np.dtype("|S1"))
result = marr.astype(np.dtype("|S"))
assert result is marr

def test_astype_incompatible_dtype_raises(self, manifest_array):
"""Test that astype with incompatible dtype raises NotImplementedError."""
marr = manifest_array(
shape=(5, 10), chunks=(5, 10), data_type=np.dtype("int32")
)
with pytest.raises(NotImplementedError):
marr.astype(np.dtype("float64"))

def test_astype_string_to_int_raises(self, manifest_array):
"""Test that astype from string to int raises NotImplementedError."""
marr = manifest_array(shape=(5, 10), chunks=(5, 10), data_type=np.dtype("<U16"))
with pytest.raises(NotImplementedError):
marr.astype(np.dtype("int32"))


class TestBroadcast:
def test_broadcast_existing_axis(self, manifest_array):
marr = manifest_array(shape=(1, 2), chunks=(1, 2))
Expand Down
18 changes: 16 additions & 2 deletions virtualizarr/tests/test_parsers/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,21 @@ async def get_meta():


@SKIP_OLDER_ZARR_PYTHON
def test_v2_metadata_with_none_fill_value():
@pytest.mark.parametrize(
"dtype",
[
"int32",
"uint8",
"float64",
"bool",
"U10",
"datetime64[s]",
"timedelta64[s]",
"S10",
"V10",
],
)
def test_v2_metadata_with_none_fill_value(dtype):
"""Test V2 metadata conversion when fill_value is None."""
import asyncio

Expand All @@ -273,7 +287,7 @@ def test_v2_metadata_with_none_fill_value():
_ = zarr.create(
shape=(5, 10),
chunks=(5, 5),
dtype="int32",
dtype=dtype,
store=store,
zarr_format=2,
fill_value=None,
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def obstore_http(url: str) -> ObjectStore:


def manifest_store_from_hdf_url(url, group: str | None = None):
registry = ObjectStoreRegistry()
registry: ObjectStoreRegistry = ObjectStoreRegistry()
registry.register(url, obstore_local(url=url))
parser = HDFParser(group=group)
return parser(url=url, registry=registry)
Loading