From ddd433ad5fd0b0a5b68c6e325a31d2e6cabbefd1 Mon Sep 17 00:00:00 2001
From: Matthew Avaylon <avaylonmatthew@gmail.com>
Date: Tue, 27 Aug 2024 16:30:19 -0700
Subject: [PATCH 01/18] VectorData Refactor Expandable (#1158)

* prior

* chckpoint

* possible poc

* fix

* finished tests

* dtype

* fix

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix again

* fix again

* clean up

* coverage

* Update CHANGELOG.md

* Update CHANGELOG.md

* Update h5tools.py

* Update h5tools.py

* clean up for review

* clean up for review

* clean up

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                       |  1 +
 src/hdmf/backends/hdf5/h5tools.py  | 31 ++++++++---
 src/hdmf/build/builders.py         | 16 ++++--
 src/hdmf/build/objectmapper.py     | 88 +++++++++++++++++++-----------
 tests/unit/helpers/utils.py        | 64 ++++++++++++++++++++++
 tests/unit/test_io_hdf5_h5tools.py | 68 +++++++++++++++++++++--
 6 files changed, 221 insertions(+), 47 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a6369094..e0b52b201 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 ### Enhancements
 - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157)
+- Added support for datasets to be expandable by default for the HDF5 backend. @mavaylon1 [#1158](https://github.com/hdmf-dev/hdmf/pull/1158)
 
 ## HDMF 3.14.3 (July 29, 2024)
 
diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index 8135d75e7..273918840 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -383,7 +383,9 @@ def copy_file(self, **kwargs):
              'default': True},
             {'name': 'herd', 'type': 'hdmf.common.resources.HERD',
              'doc': 'A HERD object to populate with references.',
-             'default': None})
+             'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
     def write(self, **kwargs):
         """Write the container to an HDF5 file."""
         if self.__mode == 'r':
@@ -826,10 +828,15 @@ def close_linked_files(self):
              'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently',
              'default': True},
             {'name': 'export_source', 'type': str,
-             'doc': 'The source of the builders when exporting', 'default': None})
+             'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
     def write_builder(self, **kwargs):
         f_builder = popargs('builder', kwargs)
-        link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
+        link_data, exhaust_dci, export_source = getargs('link_data',
+                                                        'exhaust_dci',
+                                                        'export_source',
+                                                        kwargs)
         self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
                           % (f_builder.name, self.source, kwargs))
         for name, gbldr in f_builder.groups.items():
@@ -1000,6 +1007,8 @@ def _filler():
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')},
             returns='the Group that was created', rtype=Group)
     def write_group(self, **kwargs):
         parent, builder = popargs('parent', 'builder', kwargs)
@@ -1100,6 +1109,8 @@ def write_link(self, **kwargs):
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')},
             returns='the Dataset that was created', rtype=Dataset)
     def write_dataset(self, **kwargs):  # noqa: C901
         """ Write a dataset to HDF5
@@ -1107,7 +1118,7 @@ def write_dataset(self, **kwargs):  # noqa: C901
         The function uses other dataset-dependent write functions, e.g,
         ``__scalar_fill__``, ``__list_fill__``, and ``__setup_chunked_dset__`` to write the data.
         """
-        parent, builder = popargs('parent', 'builder', kwargs)
+        parent, builder, expandable = popargs('parent', 'builder', 'expandable', kwargs)
         link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
         self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
         if self.get_written(builder):
@@ -1115,6 +1126,7 @@ def write_dataset(self, **kwargs):  # noqa: C901
             return None
         name = builder.name
         data = builder.data
+        matched_spec_shape = builder.spec_shapes
         dataio = None
         options = dict()  # dict with additional
         if isinstance(data, H5DataIO):
@@ -1228,7 +1240,7 @@ def _filler():
                 return
             # If the compound data type contains only regular data (i.e., no references) then we can write it as usual
             else:
-                dset = self.__list_fill__(parent, name, data, options)
+                dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options)
         # Write a dataset containing references, i.e., a region or object reference.
         # NOTE: we can ignore options['io_settings'] for scalar data
         elif self.__is_ref(options['dtype']):
@@ -1323,7 +1335,7 @@ def _filler():
                 self.__dci_queue.append(dataset=dset, data=data)
             # Write a regular in memory array (e.g., numpy array, list etc.)
             elif hasattr(data, '__len__'):
-                dset = self.__list_fill__(parent, name, data, options)
+                dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options)
             # Write a regular scalar dataset
             else:
                 dset = self.__scalar_fill__(parent, name, data, options)
@@ -1451,7 +1463,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None):
         return dset
 
     @classmethod
-    def __list_fill__(cls, parent, name, data, options=None):
+    def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, options=None):
         # define the io settings and data type if necessary
         io_settings = {}
         dtype = None
@@ -1473,6 +1485,11 @@ def __list_fill__(cls, parent, name, data, options=None):
             data_shape = (len(data),)
         else:
             data_shape = get_data_shape(data)
+        if expandable:
+            # Don't override existing settings
+            if 'maxshape' not in io_settings:
+                if matched_spec_shape is not None:
+                    io_settings['maxshape'] = matched_spec_shape
 
         # Create the dataset
         try:
diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py
index cb658b6d4..6ed453166 100644
--- a/src/hdmf/build/builders.py
+++ b/src/hdmf/build/builders.py
@@ -330,6 +330,9 @@ class DatasetBuilder(BaseBuilder):
              'doc': 'The datatype of this dataset.', 'default': None},
             {'name': 'attributes', 'type': dict,
              'doc': 'A dictionary of attributes to create in this dataset.', 'default': dict()},
+            {'name': 'spec_shapes', 'type': tuple,
+             'doc': ('The shape(s) defined in the spec.'),
+             'default': None},
             {'name': 'dimension_labels', 'type': tuple,
              'doc': ('A list of labels for each dimension of this dataset from the spec. Currently this is '
                      'supplied only on build.'),
@@ -341,15 +344,15 @@ class DatasetBuilder(BaseBuilder):
             {'name': 'source', 'type': str, 'doc': 'The source of the data in this builder.', 'default': None})
     def __init__(self, **kwargs):
         """ Create a Builder object for a dataset """
-        name, data, dtype, attributes, dimension_labels, maxshape, chunks, parent, source = getargs(
-            'name', 'data', 'dtype', 'attributes', 'dimension_labels', 'maxshape', 'chunks', 'parent', 'source',
-            kwargs
-        )
+        name, data, dtype, attributes, spec_shapes, dimension_labels, maxshape, chunks, parent, source = getargs(
+            'name', 'data', 'dtype', 'attributes', 'spec_shapes', 'dimension_labels', 'maxshape', 'chunks',
+            'parent', 'source', kwargs)
         super().__init__(name, attributes, parent, source)
         self['data'] = data
         self['attributes'] = _copy.copy(attributes)
         self.__dimension_labels = dimension_labels
         self.__chunks = chunks
+        self.__spec_shapes = spec_shapes
         self.__maxshape = maxshape
         if isinstance(data, BaseBuilder):
             if dtype is None:
@@ -357,6 +360,11 @@ def __init__(self, **kwargs):
         self.__dtype = dtype
         self.__name = name
 
+    @property
+    def spec_shapes(self):
+        """The shapes defined in the spec."""
+        return self.__spec_shapes
+
     @property
     def data(self):
         """The data stored in the dataset represented by this builder."""
diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index b5815ee2c..6dbfdb1d4 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -555,6 +555,7 @@ def get_attribute(self, **kwargs):
     def get_attr_value(self, **kwargs):
         ''' Get the value of the attribute corresponding to this spec from the given container '''
         spec, container, manager = getargs('spec', 'container', 'manager', kwargs)
+
         attr_name = self.get_attribute(spec)
         if attr_name is None:
             return None
@@ -723,7 +724,10 @@ def build(self, **kwargs):
                     msg = "'container' must be of type Data with DatasetSpec"
                     raise ValueError(msg)
                 spec_dtype, spec_shape, spec_dims, spec = self.__check_dset_spec(self.spec, spec_ext)
-                dimension_labels = self.__get_dimension_labels_from_spec(container.data, spec_shape, spec_dims)
+                dimension_labels, matched_shape = self.__get_spec_info(container.data,
+                                                                       spec_shape,
+                                                                       spec_dims,
+                                                                       spec_dtype)
                 if isinstance(spec_dtype, RefSpec):
                     self.logger.debug("Building %s '%s' as a dataset of references (source: %s)"
                                       % (container.__class__.__name__, container.name, repr(source)))
@@ -734,6 +738,7 @@ def build(self, **kwargs):
                         parent=parent,
                         source=source,
                         dtype=spec_dtype.reftype,
+                        spec_shapes=matched_shape,
                         dimension_labels=dimension_labels,
                     )
                     manager.queue_ref(self.__set_dataset_to_refs(builder, spec_dtype, spec_shape, container, manager))
@@ -748,6 +753,7 @@ def build(self, **kwargs):
                         parent=parent,
                         source=source,
                         dtype=spec_dtype,
+                        spec_shapes=matched_shape,
                         dimension_labels=dimension_labels,
                     )
                     manager.queue_ref(self.__set_compound_dataset_to_refs(builder, spec, spec_dtype, container,
@@ -766,6 +772,7 @@ def build(self, **kwargs):
                             parent=parent,
                             source=source,
                             dtype="object",
+                            spec_shapes=matched_shape,
                             dimension_labels=dimension_labels,
                         )
                         manager.queue_ref(self.__set_untyped_dataset_to_refs(builder, container, manager))
@@ -789,6 +796,7 @@ def build(self, **kwargs):
                             parent=parent,
                             source=source,
                             dtype=dtype,
+                            spec_shapes=matched_shape,
                             dimension_labels=dimension_labels,
                         )
 
@@ -820,10 +828,7 @@ def __check_dset_spec(self, orig, ext):
             spec = ext
         return dtype, shape, dims, spec
 
-    def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple:
-        if spec_shape is None or spec_dims is None:
-            return None
-        data_shape = get_data_shape(data)
+    def __get_matched_dimension(self, data_shape, spec_shape, spec_dtype=None):
         # if shape is a list of allowed shapes, find the index of the shape that matches the data
         if isinstance(spec_shape[0], list):
             match_shape_inds = list()
@@ -839,37 +844,58 @@ def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple
                         break
                 if match:
                     match_shape_inds.append(i)
-            # use the most specific match -- the one with the fewest Nones
-            if match_shape_inds:
-                if len(match_shape_inds) == 1:
-                    return tuple(spec_dims[match_shape_inds[0]])
+            return match_shape_inds
+
+    def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None):
+        """This will return the dimension labels and shape by matching the data shape to a permissible spec shape."""
+        if spec_shape is None and spec_dims is None:
+            return None, None
+        else:
+            if spec_dtype is not None and isinstance(spec_dtype, list):
+                data_shape = (len(data),)
+            else:
+                data_shape = get_data_shape(data)
+
+            if isinstance(spec_shape[0], list):
+                match_shape_inds = self.__get_matched_dimension(data_shape, spec_shape, spec_dtype)
+                if len(match_shape_inds) == 0:
+                    # no matches found
+                    msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path
+                    warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
+                    return None, None
+                elif len(match_shape_inds) == 1:
+                    if spec_dims is not None:
+                        return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]])
+                    else:
+                        return spec_dims, tuple(spec_shape[match_shape_inds[0]])
                 else:
                     count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds]
                     index_min_count = count_nones.index(min(count_nones))
                     best_match_ind = match_shape_inds[index_min_count]
-                    return tuple(spec_dims[best_match_ind])
+                    if spec_dims is not None:
+                        return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind])
+                    else:
+                        return spec_dims, tuple(spec_shape[best_match_ind])
             else:
-                # no matches found
-                msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path
-                warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
-                return None
-        else:
-            if len(data_shape) != len(spec_shape):
-                msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
-                warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
-                return None
-            # check each dimension. None means any length is allowed
-            match = True
-            for j, d in enumerate(data_shape):
-                if spec_shape[j] is not None and spec_shape[j] != d:
-                    match = False
-                    break
-            if not match:
-                msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
-                warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
-                return None
-            # shape is a single list of allowed dimension lengths
-            return tuple(spec_dims)
+                if len(data_shape) != len(spec_shape):
+                    msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
+                    warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
+                    return None, None
+                # check each dimension. None means any length is allowed
+                match = True
+                for j, d in enumerate(data_shape):
+                    if spec_shape[j] is not None and spec_shape[j] != d:
+                        match = False
+                        break
+                if not match:
+                    msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
+                    warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
+                    return None, None
+                # shape is a single list of allowed dimension lengths
+                if spec_dims is not None:
+                    return tuple(spec_dims), tuple(spec_shape)
+                else:
+                    return None, tuple(spec_shape)
 
     def __is_reftype(self, data):
         if (isinstance(data, AbstractDataChunkIterator) or
diff --git a/tests/unit/helpers/utils.py b/tests/unit/helpers/utils.py
index 0f4b3c4bf..b2d2cb4ae 100644
--- a/tests/unit/helpers/utils.py
+++ b/tests/unit/helpers/utils.py
@@ -343,6 +343,70 @@ def __init__(self, spec):
     manager = BuildManager(type_map)
     return manager
 
+############################################
+# Qux: A test class with variable data shapes
+############################################
+class QuxData(Data):
+    pass
+
+
+class QuxBucket(Container):
+    "PseudoFile"
+    @docval(
+        {"name": "name", "type": str, "doc": "the name of this bucket"},
+        {"name": "qux_data", "type": QuxData, "doc": "Data with user defined shape."},
+    )
+    def __init__(self, **kwargs):
+        name, qux_data = getargs("name", "qux_data", kwargs)
+        super().__init__(name=name)
+        self.__qux_data = qux_data
+        self.__qux_data.parent = self
+
+    @property
+    def qux_data(self):
+        return self.__qux_data
+
+
+def get_qux_buildmanager(shape):
+    qux_data_spec = DatasetSpec(
+        doc="A test dataset of references specification with a data type",
+        name="qux_data",
+        data_type_def="QuxData",
+        shape=shape,
+        dtype='int'
+    )
+
+    qux_bucket_spec = GroupSpec(
+        doc="A test group specification for a data type containing data type",
+        data_type_def="QuxBucket",
+        datasets=[
+            DatasetSpec(doc="doc", data_type_inc="QuxData"),
+        ],
+    )
+
+    spec_catalog = SpecCatalog()
+    spec_catalog.register_spec(qux_data_spec, "test.yaml")
+    spec_catalog.register_spec(qux_bucket_spec, "test.yaml")
+
+    namespace = SpecNamespace(
+        "a test namespace",
+        CORE_NAMESPACE,
+        [{"source": "test.yaml"}],
+        version="0.1.0",
+        catalog=spec_catalog,
+    )
+
+    namespace_catalog = NamespaceCatalog()
+    namespace_catalog.add_namespace(CORE_NAMESPACE, namespace)
+
+    type_map = TypeMap(namespace_catalog)
+    type_map.register_container_type(CORE_NAMESPACE, "QuxData", QuxData)
+    type_map.register_container_type(CORE_NAMESPACE, "QuxBucket", QuxBucket)
+
+    manager = BuildManager(type_map)
+    return manager
+
+
 
 ############################################
 #  Baz example data containers and specs
diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 5a4fd5a32..4dece7398 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -10,9 +10,12 @@
 import zipfile
 
 import h5py
-import numpy as np
 from h5py import SoftLink, HardLink, ExternalLink, File
 from h5py import filters as h5py_filters
+
+import numpy as np
+import numpy.testing as npt
+
 from hdmf.backends.hdf5 import H5DataIO
 from hdmf.backends.hdf5.h5tools import HDF5IO, SPEC_LOC_ATTR, H5PY_3
 from hdmf.backends.io import HDMFIO
@@ -28,12 +31,13 @@
 from hdmf.testing import TestCase, remove_test_file
 from hdmf.common.resources import HERD
 from hdmf.term_set import TermSet, TermSetWrapper
-
+from hdmf.utils import get_data_shape
 
 from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager,
                               Baz, BazData, BazCpdData, BazBucket, get_baz_buildmanager,
                               CORE_NAMESPACE, get_temp_filepath, CacheSpecTestHelper,
-                              CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace)
+                              CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace,
+                              QuxData, QuxBucket, get_qux_buildmanager)
 
 try:
     import zarr
@@ -739,12 +743,12 @@ def test_copy_h5py_dataset_h5dataio_input(self):
                              self.f['test_copy'][:].tolist())
 
     def test_list_fill_empty(self):
-        dset = self.io.__list_fill__(self.f, 'empty_dataset', [], options={'dtype': int, 'io_settings': {}})
+        dset = self.io.__list_fill__(self.f, 'empty_dataset', [], None, True, options={'dtype': int, 'io_settings': {}})
         self.assertTupleEqual(dset.shape, (0,))
 
     def test_list_fill_empty_no_dtype(self):
         with self.assertRaisesRegex(Exception, r"cannot add \S+ to [/\S]+ - could not determine type"):
-            self.io.__list_fill__(self.f, 'empty_dataset', [])
+            self.io.__list_fill__(self.f, 'empty_dataset', [], None, True)
 
     def test_read_str(self):
         a = ['a', 'bb', 'ccc', 'dddd', 'e']
@@ -3725,3 +3729,57 @@ def test_set_data_io(self):
         self.data.set_data_io(H5DataIO, dict(chunks=True))
         assert isinstance(self.data.data, H5DataIO)
         assert self.data.data.io_settings["chunks"]
+
+
+class TestExpand(TestCase):
+    def setUp(self):
+        self.manager = get_foo_buildmanager()
+        self.path = get_temp_filepath()
+
+    def test_expand_false(self):
+        # Setup all the data we need
+        foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
+        foobucket = FooBucket('bucket1', [foo1])
+        foofile = FooFile(buckets=[foobucket])
+
+        with HDF5IO(self.path, manager=self.manager, mode='w') as io:
+            io.write(foofile, expandable=False)
+
+        with HDF5IO(self.path, manager=self.manager, mode='r') as io:
+            read_foofile = io.read()
+            self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data,
+                                     read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist())
+            self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data),
+                            (5,))
+
+    def test_multi_shape_no_labels(self):
+        qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]])
+        quxbucket = QuxBucket('bucket1', qux)
+
+        manager = get_qux_buildmanager([[None, None],[None, 3]])
+
+        with HDF5IO(self.path, manager=manager, mode='w') as io:
+            io.write(quxbucket, expandable=True)
+
+        with HDF5IO(self.path, manager=manager, mode='r') as io:
+            read_quxbucket = io.read()
+            self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))
+
+    def test_expand_set_shape(self):
+        qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]])
+        quxbucket = QuxBucket('bucket1', qux)
+
+        manager = get_qux_buildmanager([None, 3])
+
+        with HDF5IO(self.path, manager=manager, mode='w') as io:
+            io.write(quxbucket, expandable=True)
+
+        with HDF5IO(self.path, manager=manager, mode='r+') as io:
+            read_quxbucket = io.read()
+            read_quxbucket.qux_data.append([7,8,9])
+
+            expected = np.array([[1, 2, 3],
+                                 [4, 5, 6],
+                                 [7, 8, 9]])
+            npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected)
+            self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))

From 06586c48198d7919ba5296a036d0a2bffba68a88 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Tue, 27 Aug 2024 17:33:30 -0700
Subject: [PATCH 02/18] fix and existing tests pass

---
 src/hdmf/build/objectmapper.py                | 30 +++++++++++++++++--
 .../mapper_tests/test_build_datetime.py       |  3 +-
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index 6dbfdb1d4..fe22ea1db 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -14,7 +14,7 @@
                        IncorrectDatasetShapeBuildWarning)
 from ..container import AbstractContainer, Data, DataRegion
 from ..term_set import TermSetWrapper
-from ..data_utils import DataIO, AbstractDataChunkIterator
+from ..data_utils import DataIO, AbstractDataChunkIterator, InvalidDataIOError
 from ..query import ReferenceResolver
 from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec
 from ..spec.spec import BaseStorageSpec
@@ -1122,7 +1122,33 @@ def __add_datasets(self, builder, datasets, container, build_manager, source, ex
                         raise BuildError(builder, msg) from ex
                     self.logger.debug("        Adding untyped dataset for spec name %s and adding attributes"
                                       % repr(spec.name))
-                    sub_builder = DatasetBuilder(spec.name, data, parent=builder, source=source, dtype=dtype)
+                    try:
+                        expand = True
+                        dimension_labels, matched_shape = self.__get_spec_info(data,
+                                                                               spec.shape,
+                                                                               spec.dims,
+                                                                               dtype)
+                    except InvalidDataIOError:
+                        # This try/except is for tests overwriting data in H5DataIO, i.e., raise error
+                        # since this is not allowed.
+                        # ---> test_io_hdf5_h5tools.py::HDF5IOEmptyDataset::test_overwrite_dataset
+                        expand = False
+
+                    if expand:
+                        sub_builder = DatasetBuilder(spec.name,
+                                                     data,
+                                                     parent=builder,
+                                                     source=source,
+                                                     dtype=dtype,
+                                                     spec_shapes=matched_shape,
+                                                     dimension_labels=dimension_labels,
+                                                 )
+                    else:
+                        sub_builder = DatasetBuilder(spec.name,
+                                                     data,
+                                                     parent=builder,
+                                                     source=source,
+                                                     dtype=dtype)
                     builder.set_dataset(sub_builder)
                 self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export)
             else:
diff --git a/tests/unit/build_tests/mapper_tests/test_build_datetime.py b/tests/unit/build_tests/mapper_tests/test_build_datetime.py
index 9e2b5e84a..5d4ab5c39 100644
--- a/tests/unit/build_tests/mapper_tests/test_build_datetime.py
+++ b/tests/unit/build_tests/mapper_tests/test_build_datetime.py
@@ -67,8 +67,9 @@ def test_datetime_array(self):
         bar_inst = Bar(name='my_bar', data=[datetime(2023, 7, 9), datetime(2023, 7, 10)])
         builder = type_map.build(bar_inst)
         ret = builder.get('data')
+        # breakpoint()
         assert ret.data == [b'2023-07-09T00:00:00', b'2023-07-10T00:00:00']
-        assert ret.dtype == 'ascii'
+        # assert ret.dtype == 'ascii'
 
     def test_date_array(self):
         bar_spec = GroupSpec(

From e488cf3dcef8dfaa07dcb1a3efb48e1bbd663add Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Tue, 27 Aug 2024 17:55:00 -0700
Subject: [PATCH 03/18] test

---
 tests/unit/test_io_hdf5_h5tools.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 4dece7398..56f0ea21b 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -791,6 +791,26 @@ def test_roundtrip_basic(self):
             self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data,
                                  read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist())
 
+    def test_roundtrip_basic_append(self):
+        # Setup all the data we need
+        foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
+        foobucket = FooBucket('bucket1', [foo1])
+        foofile = FooFile(buckets=[foobucket])
+
+        with HDF5IO(self.path, manager=self.manager, mode='w') as io:
+            io.write(foofile)
+
+        with HDF5IO(self.path, manager=self.manager, mode='a') as io:
+            read_foofile = io.read()
+            data = read_foofile.buckets['bucket1'].foos['foo1'].my_data
+            shape = list(data.shape)
+            shape[0] += 1
+            data.resize(shape)
+            data[-1] = 6
+            self.assertListEqual(read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist(),
+                                 [1, 2, 3, 4, 5, 6])
+
+
     def test_roundtrip_empty_dataset(self):
         foo1 = Foo('foo1', [], "I am foo1", 17, 3.14)
         foobucket = FooBucket('bucket1', [foo1])

From e5854003eee65fd6e94359f6b285dcad9fe51679 Mon Sep 17 00:00:00 2001
From: Matthew Avaylon <avaylonmatthew@gmail.com>
Date: Tue, 3 Sep 2024 16:08:42 -0700
Subject: [PATCH 04/18] Update test_build_datetime.py

---
 tests/unit/build_tests/mapper_tests/test_build_datetime.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/unit/build_tests/mapper_tests/test_build_datetime.py b/tests/unit/build_tests/mapper_tests/test_build_datetime.py
index 5d4ab5c39..9e2b5e84a 100644
--- a/tests/unit/build_tests/mapper_tests/test_build_datetime.py
+++ b/tests/unit/build_tests/mapper_tests/test_build_datetime.py
@@ -67,9 +67,8 @@ def test_datetime_array(self):
         bar_inst = Bar(name='my_bar', data=[datetime(2023, 7, 9), datetime(2023, 7, 10)])
         builder = type_map.build(bar_inst)
         ret = builder.get('data')
-        # breakpoint()
         assert ret.data == [b'2023-07-09T00:00:00', b'2023-07-10T00:00:00']
-        # assert ret.dtype == 'ascii'
+        assert ret.dtype == 'ascii'
 
     def test_date_array(self):
         bar_spec = GroupSpec(

From ce47e7280284b586afab5ff03c01a380c8b16723 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Tue, 3 Sep 2024 16:24:51 -0700
Subject: [PATCH 05/18] cleanup

---
 src/hdmf/build/objectmapper.py | 40 +++++++++++-----------------------
 1 file changed, 13 insertions(+), 27 deletions(-)

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index c844ead1e..0cea8840d 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -1134,33 +1134,19 @@ def __add_datasets(self, builder, datasets, container, build_manager, source, ex
                         raise BuildError(builder, msg) from ex
                     self.logger.debug("        Adding untyped dataset for spec name %s and adding attributes"
                                       % repr(spec.name))
-                    try:
-                        expand = True
-                        dimension_labels, matched_shape = self.__get_spec_info(data,
-                                                                               spec.shape,
-                                                                               spec.dims,
-                                                                               dtype)
-                    except InvalidDataIOError:
-                        # This try/except is for tests overwriting data in H5DataIO, i.e., raise error
-                        # since this is not allowed.
-                        # ---> test_io_hdf5_h5tools.py::HDF5IOEmptyDataset::test_overwrite_dataset
-                        expand = False
-
-                    if expand:
-                        sub_builder = DatasetBuilder(spec.name,
-                                                     data,
-                                                     parent=builder,
-                                                     source=source,
-                                                     dtype=dtype,
-                                                     spec_shapes=matched_shape,
-                                                     dimension_labels=dimension_labels,
-                                                 )
-                    else:
-                        sub_builder = DatasetBuilder(spec.name,
-                                                     data,
-                                                     parent=builder,
-                                                     source=source,
-                                                     dtype=dtype)
+
+                    dimension_labels, matched_shape = self.__get_spec_info(data,
+                                                                           spec.shape,
+                                                                           spec.dims,
+                                                                           dtype)
+
+                    sub_builder = DatasetBuilder(spec.name,
+                                                 data,
+                                                 parent=builder,
+                                                 source=source,
+                                                 dtype=dtype,
+                                                 spec_shapes=matched_shape,
+                                                 dimension_labels=dimension_labels)
                     builder.set_dataset(sub_builder)
                 self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export)
             else:

From 15689deb9feb70fc4e096f5ec505d1b40e66f686 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Tue, 3 Sep 2024 16:27:31 -0700
Subject: [PATCH 06/18] cleanup

---
 src/hdmf/build/objectmapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index 0cea8840d..fc25efc16 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -17,7 +17,7 @@
 
 from ..container import AbstractContainer, Data, DataRegion
 from ..term_set import TermSetWrapper
-from ..data_utils import DataIO, AbstractDataChunkIterator, InvalidDataIOError
+from ..data_utils import DataIO, AbstractDataChunkIterator
 from ..query import ReferenceResolver
 from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec
 from ..spec.spec import BaseStorageSpec

From 048925770ce34715dc5ddaf37199348ed2c44802 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 10 Feb 2025 22:58:44 +0000
Subject: [PATCH 07/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/unit/test_io_hdf5_h5tools.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index b56dc5026..debec26d8 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -3984,4 +3984,3 @@ def test_expand_set_shape(self):
                                  [7, 8, 9]])
             npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected)
             self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))
-

From 7f3b40ba2c94f5d5f939c37001d4b587b389c484 Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 00:29:39 -0800
Subject: [PATCH 08/18] Update tests/unit/test_io_hdf5_h5tools.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/unit/test_io_hdf5_h5tools.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 457c74fc8..76abe07a7 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -4116,6 +4116,9 @@ def setUp(self):
         self.manager = get_foo_buildmanager()
         self.path = get_temp_filepath()
 
+    def tearDown(self):
+        if os.path.exists(self.path):
+            os.remove(self.path)
     def test_expand_false(self):
         # Setup all the data we need
         foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)

From 62da59709dcbc6346ff1f160f7875c39b2d89adf Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 00:32:01 -0800
Subject: [PATCH 09/18] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/hdmf/backends/hdf5/h5tools.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index d8dcd6aa8..9f3f39b2c 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -789,10 +789,6 @@ def close_linked_files(self):
              'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
     def write_builder(self, **kwargs):
         f_builder = popargs('builder', kwargs)
-        link_data, exhaust_dci, export_source = getargs('link_data',
-                                                        'exhaust_dci',
-                                                        'export_source',
-                                                        kwargs)
         self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
                           % (f_builder.name, self.source, kwargs))
         for name, gbldr in f_builder.groups.items():

From 1641fc441113a9011d355380d85bd9b3b7e78769 Mon Sep 17 00:00:00 2001
From: rly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 00:35:52 -0800
Subject: [PATCH 10/18] Clean up write_builder

---
 src/hdmf/backends/hdf5/h5tools.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index d8dcd6aa8..4bb6abfb2 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -789,17 +789,13 @@ def close_linked_files(self):
              'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
     def write_builder(self, **kwargs):
         f_builder = popargs('builder', kwargs)
-        link_data, exhaust_dci, export_source = getargs('link_data',
-                                                        'exhaust_dci',
-                                                        'export_source',
-                                                        kwargs)
         self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
                           % (f_builder.name, self.source, kwargs))
-        for name, gbldr in f_builder.groups.items():
+        for gbldr in f_builder.groups.values():
             self.write_group(self.__file, gbldr, **kwargs)
-        for name, dbldr in f_builder.datasets.items():
+        for dbldr in f_builder.datasets.values():
             self.write_dataset(self.__file, dbldr, **kwargs)
-        for name, lbldr in f_builder.links.items():
+        for lbldr in f_builder.links.values():
             self.write_link(self.__file, lbldr, export_source=kwargs.get("export_source"))
         self.set_attributes(self.__file, f_builder.attributes)
         self.__add_refs()

From 9ca356cf0ae276b9a301f7a9ae2e278210cebe65 Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 01:37:24 -0800
Subject: [PATCH 11/18] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/hdmf/build/objectmapper.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index 46a2c4ea6..b37d0a944 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -895,6 +895,31 @@ def __check_dset_spec(self, orig, ext):
         return dtype, shape, dims, spec
 
     def __get_matched_dimension(self, data_shape, spec_shape, spec_dtype=None):
+        """
+        Determine which allowed shapes match the given data shape.
+
+        Parameters
+        ----------
+        data_shape : tuple or list of int
+            The shape of the data being validated.
+        spec_shape : sequence
+            A sequence of allowed shapes for the dataset. When the first element is a
+            list (or other sequence), this is interpreted as a list of alternative
+            shapes, where each element is itself a shape specification. Each shape
+            specification is a sequence of dimension lengths or ``None``, where
+            ``None`` means any length is allowed for that dimension.
+        spec_dtype : any, optional
+            The expected data type for the dataset. This parameter is currently not
+            used in the matching logic but is accepted for interface consistency.
+
+        Returns
+        -------
+        list of int or None
+            A list of indices into ``spec_shape`` whose shape specifications match
+            ``data_shape``. Returns an empty list if no shapes match. Returns
+            ``None`` if ``spec_shape`` is not a list of alternative shapes (i.e.,
+            when ``spec_shape[0]`` is not a list-like shape specification).
+        """
         # if shape is a list of allowed shapes, find the index of the shape that matches the data
         if isinstance(spec_shape[0], list):
             match_shape_inds = list()

From ad91c45c107e2e4fdd14fac90ebc6091528f46e1 Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 01:37:52 -0800
Subject: [PATCH 12/18] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/hdmf/build/objectmapper.py | 43 +++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index b37d0a944..16704c2a7 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -938,7 +938,48 @@ def __get_matched_dimension(self, data_shape, spec_shape, spec_dtype=None):
             return match_shape_inds
 
     def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None):
-        """This will return the dimension labels and shape by matching the data shape to a permissible spec shape."""
+        """
+        Determine the dimension labels and shape for data based on the allowed shapes in the spec.
+
+        This method compares the shape of ``data`` against the allowed shapes defined in
+        ``spec_shape`` and, if possible, selects the best-matching shape and corresponding
+        dimension labels from ``spec_dims``.
+
+        Parameters
+        ----------
+        data
+            The data object whose shape is to be matched against the specification. This may be
+            any array-like object supported by :func:`get_data_shape`.
+        spec_shape
+            The allowed shape definition(s) from the spec. This may be:
+
+            * ``None`` – no shape is defined.
+            * A list/tuple of dimension lengths, where each element is an int or ``None``.
+            * A list of such lists/tuples, representing multiple allowed shapes.
+        spec_dims
+            The dimension labels defined in the spec. This may be:
+
+            * ``None`` – no dimension labels are defined.
+            * A list/tuple of labels corresponding to a single shape in ``spec_shape``.
+            * A list of lists/tuples of labels, each corresponding to an entry in
+              ``spec_shape`` when multiple shapes are allowed.
+        spec_dtype
+            The dtype or list of dtypes defined in the spec. When ``spec_dtype`` is a list,
+            the data is treated as 1D with length equal to ``len(data)`` for the purpose of
+            shape matching. May be ``None`` if no dtype constraint is specified.
+
+        Returns
+        -------
+        tuple
+            A 2-tuple ``(dims, shape)`` where:
+
+            * ``dims`` is either ``None`` or a tuple of selected dimension labels.
+            * ``shape`` is either ``None`` or a tuple of dimension lengths corresponding to
+              the best-matching allowed shape.
+
+            If no matching shape is found, or if both ``spec_shape`` and ``spec_dims`` are
+            ``None``, the method returns ``(None, None)``.
+        """
         if spec_shape is None and spec_dims is None:
             return None, None
         else:

From 02a52421207f32c5b33c69420262bc7b193bceaf Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 01:39:22 -0800
Subject: [PATCH 13/18] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/unit/test_io_hdf5_h5tools.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 76abe07a7..3cfc8e445 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -4119,6 +4119,7 @@ def setUp(self):
     def tearDown(self):
         if os.path.exists(self.path):
             os.remove(self.path)
+
     def test_expand_false(self):
         # Setup all the data we need
         foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)

From 0fbf0ecec17840e6860cd93abea5887bd51b3dad Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 01:39:34 -0800
Subject: [PATCH 14/18] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/unit/test_io_hdf5_h5tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 3cfc8e445..3d3afc4b4 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -4140,7 +4140,7 @@ def test_multi_shape_no_labels(self):
         qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]])
         quxbucket = QuxBucket('bucket1', qux)
 
-        manager = get_qux_buildmanager([[None, None],[None, 3]])
+        manager = get_qux_buildmanager([[None, None], [None, 3]])
 
         with HDF5IO(self.path, manager=manager, mode='w') as io:
             io.write(quxbucket, expandable=True)

From 70f0791d82763e452b9eaa572bb8bf1ffb80a35a Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 01:39:45 -0800
Subject: [PATCH 15/18] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/unit/test_io_hdf5_h5tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 3d3afc4b4..42a22cd80 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -4160,7 +4160,7 @@ def test_expand_set_shape(self):
 
         with HDF5IO(self.path, manager=manager, mode='r+') as io:
             read_quxbucket = io.read()
-            read_quxbucket.qux_data.append([7,8,9])
+            read_quxbucket.qux_data.append([7, 8, 9])
 
             expected = np.array([[1, 2, 3],
                                  [4, 5, 6],

From dd8b10195a61e7ff445e5879c306a40167825c58 Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 01:39:55 -0800
Subject: [PATCH 16/18] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/unit/test_io_hdf5_h5tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 42a22cd80..0835e0001 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -4147,7 +4147,7 @@ def test_multi_shape_no_labels(self):
 
         with HDF5IO(self.path, manager=manager, mode='r') as io:
             read_quxbucket = io.read()
-            self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))
+            self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None, 3))
 
     def test_expand_set_shape(self):
         qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]])

From d350d2a69249254c4de7b9f523910c6463475fbd Mon Sep 17 00:00:00 2001
From: Ryan Ly <310197+rly@users.noreply.github.com>
Date: Fri, 6 Feb 2026 01:40:21 -0800
Subject: [PATCH 17/18] Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/unit/test_io_hdf5_h5tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 0835e0001..e737b2feb 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -4166,4 +4166,4 @@ def test_expand_set_shape(self):
                                  [4, 5, 6],
                                  [7, 8, 9]])
             npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected)
-            self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))
+            self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None, 3))

From 27fe4c4b636a079d277019c045cb03ced02d1f2d Mon Sep 17 00:00:00 2001
From: rly <310197+rly@users.noreply.github.com>
Date: Mon, 9 Feb 2026 19:13:01 -0800
Subject: [PATCH 18/18] Check shape before maxshape in get_data_shape

---
 src/hdmf/utils.py                   | 5 ++---
 tests/unit/utils_test/test_utils.py | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py
index c7fe2b476..578da41f2 100644
--- a/src/hdmf/utils.py
+++ b/src/hdmf/utils.py
@@ -843,11 +843,10 @@ def __get_shape_helper(local_data):
     if isinstance(data, Data):
         data = data.data
 
-    # NOTE: data.maxshape will fail on empty h5py.Dataset without shape or maxshape. this will be fixed in h5py 3.0
-    if hasattr(data, 'maxshape'):
-        return data.maxshape
     if hasattr(data, 'shape') and data.shape is not None:
         return data.shape
+    if hasattr(data, 'maxshape'):
+        return data.maxshape
     if isinstance(data, dict):
         return None
     if hasattr(data, '__len__') and not isinstance(data, (str, bytes)):
diff --git a/tests/unit/utils_test/test_utils.py b/tests/unit/utils_test/test_utils.py
index 3b8fb1013..98115d3d3 100644
--- a/tests/unit/utils_test/test_utils.py
+++ b/tests/unit/utils_test/test_utils.py
@@ -22,10 +22,10 @@ def test_h5dataset(self):
             res = get_data_shape(dset)
             self.assertTupleEqual(res, (3, 2))
 
-            # test that maxshape takes priority
+            # test that shape takes priority over maxshape for objects that have both
             dset = f.create_dataset('shape_maxshape', shape=(3, 2), maxshape=(None, 100))
             res = get_data_shape(dset)
-            self.assertTupleEqual(res, (None, 100))
+            self.assertTupleEqual(res, (3, 2))
 
         os.remove(path)