hdmf-dev · mavaylon1 · Aug 27, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 30, 2024
diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
@@ -309,7 +309,9 @@ def __get_namespaces(cls, file_obj):
              'default': True},
             {'name': 'herd', 'type': 'hdmf.common.resources.HERD',
              'doc': 'A HERD object to populate with references.',
-             'default': None})
+             'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
     def write(self, **kwargs):
         """Write the container to an HDF5 file."""
         if self.__mode == 'r':
@@ -750,17 +752,18 @@ def close_linked_files(self):
              'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently',
              'default': True},
             {'name': 'export_source', 'type': str,
-             'doc': 'The source of the builders when exporting', 'default': None})
+             'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
     def write_builder(self, **kwargs):
         f_builder = popargs('builder', kwargs)
-        link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
         self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
                           % (f_builder.name, self.source, kwargs))
-        for name, gbldr in f_builder.groups.items():
+        for gbldr in f_builder.groups.values():
             self.write_group(self.__file, gbldr, **kwargs)
-        for name, dbldr in f_builder.datasets.items():
+        for dbldr in f_builder.datasets.values():
             self.write_dataset(self.__file, dbldr, **kwargs)
-        for name, lbldr in f_builder.links.items():
+        for lbldr in f_builder.links.values():
             self.write_link(self.__file, lbldr, export_source=kwargs.get("export_source"))
         self.set_attributes(self.__file, f_builder.attributes)
         self.__add_refs()
@@ -927,6 +930,8 @@ def _filler():
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')},
             returns='the Group that was created', rtype=Group)
     def write_group(self, **kwargs):
         parent, builder = popargs('parent', 'builder', kwargs)
@@ -1027,21 +1032,24 @@ def write_link(self, **kwargs):
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')},
             returns='the Dataset that was created', rtype=Dataset)
     def write_dataset(self, **kwargs):  # noqa: C901
         """ Write a dataset to HDF5
 
         The function uses other dataset-dependent write functions, e.g,
         ``__scalar_fill__``, ``__list_fill__``, and ``__setup_chunked_dset__`` to write the data.
         """
-        parent, builder = popargs('parent', 'builder', kwargs)
+        parent, builder, expandable = popargs('parent', 'builder', 'expandable', kwargs)
         link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
         self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
         if self.get_written(builder):
             self.logger.debug("    DatasetBuilder '%s' is already written" % builder.name)
             return None
         name = builder.name
         data = builder.data
+        matched_spec_shape = builder.spec_shapes
         dataio = None
         options = dict()  # dict with additional
         if isinstance(data, H5DataIO):
@@ -1157,8 +1165,9 @@ def _filler():
             elif len(np.shape(data)) == 0:
                 dset = self.__scalar_fill__(parent, name, data, options)
             else:
-                dset = self.__list_fill__(parent, name, data, options)
+                dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options)
         # Write a dataset containing references, i.e., object reference.
+
         # NOTE: we can ignore options['io_settings'] for scalar data
         elif self.__is_ref(options['dtype']):
             _dtype = self.__dtypes.get(options['dtype'])
@@ -1215,7 +1224,7 @@ def _filler():
                 self.__dci_queue.append(dataset=dset, data=data)
             # Write a regular in memory array (e.g., numpy array, list etc.)
             elif hasattr(data, '__len__'):
-                dset = self.__list_fill__(parent, name, data, options)
+                dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options)
             # Write a regular scalar dataset
             else:
                 dset = self.__scalar_fill__(parent, name, data, options)
@@ -1343,7 +1352,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None):
         return dset
 
     @classmethod
-    def __list_fill__(cls, parent, name, data, options=None):
+    def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, options=None):
         # define the io settings and data type if necessary
         io_settings = {}
         dtype = None
@@ -1365,6 +1374,11 @@ def __list_fill__(cls, parent, name, data, options=None):
             data_shape = (len(data),)
         else:
             data_shape = get_data_shape(data)
+        if expandable:
+            # Don't override existing settings
+            if 'maxshape' not in io_settings:
+                if matched_spec_shape is not None:
+                    io_settings['maxshape'] = matched_spec_shape
 
         # Create the dataset
         try:

diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py
@@ -328,6 +328,9 @@ class DatasetBuilder(BaseBuilder):
              'doc': 'The datatype of this dataset.', 'default': None},
             {'name': 'attributes', 'type': dict,
              'doc': 'A dictionary of attributes to create in this dataset.', 'default': dict()},
+            {'name': 'spec_shapes', 'type': tuple,
+             'doc': ('The shape(s) defined in the spec.'),
+             'default': None},
             {'name': 'dimension_labels', 'type': tuple,
              'doc': ('A list of labels for each dimension of this dataset from the spec. Currently this is '
                      'supplied only on build.'),
@@ -339,22 +342,27 @@ class DatasetBuilder(BaseBuilder):
             {'name': 'source', 'type': str, 'doc': 'The source of the data in this builder.', 'default': None})
     def __init__(self, **kwargs):
         """ Create a Builder object for a dataset """
-        name, data, dtype, attributes, dimension_labels, maxshape, chunks, parent, source = getargs(
-            'name', 'data', 'dtype', 'attributes', 'dimension_labels', 'maxshape', 'chunks', 'parent', 'source',
-            kwargs
-        )
+        name, data, dtype, attributes, spec_shapes, dimension_labels, maxshape, chunks, parent, source = getargs(
+            'name', 'data', 'dtype', 'attributes', 'spec_shapes', 'dimension_labels', 'maxshape', 'chunks',
+            'parent', 'source', kwargs)
         super().__init__(name, attributes, parent, source)
         self['data'] = data
         self['attributes'] = _copy.copy(attributes)
         self.__dimension_labels = dimension_labels
         self.__chunks = chunks
+        self.__spec_shapes = spec_shapes
         self.__maxshape = maxshape
         if isinstance(data, BaseBuilder):
             if dtype is None:
                 dtype = self.OBJECT_REF_TYPE
         self.__dtype = dtype
         self.__name = name
 
+    @property
+    def spec_shapes(self):
+        """The shapes defined in the spec."""
+        return self.__spec_shapes
+
     @property
     def data(self):
         """The data stored in the dataset represented by this builder."""