Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ddd433a
VectorData Refactor Expandable (#1158)
mavaylon1 Aug 27, 2024
06586c4
fix and existing tests pass
mavaylon1 Aug 28, 2024
e488cf3
test
mavaylon1 Aug 28, 2024
2921c45
Merge branch 'dev' into staging_expand
mavaylon1 Aug 30, 2024
b253732
Merge branch 'dev' into staging_expand
mavaylon1 Sep 3, 2024
e585400
Update test_build_datetime.py
mavaylon1 Sep 3, 2024
ce47e72
cleanup
mavaylon1 Sep 3, 2024
15689de
cleanup
mavaylon1 Sep 3, 2024
d593a49
Merge branch 'dev' into staging_expand
mavaylon1 Feb 10, 2025
0489257
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 10, 2025
9b76a1e
Merge branch 'dev' into staging_expand
mavaylon1 Feb 20, 2025
08bf60b
Merge branch 'dev' into staging_expand
mavaylon1 Jul 11, 2025
339a0e5
Merge branch 'dev' into staging_expand
rly Oct 20, 2025
b9a3615
Merge branch 'dev' into staging_expand
rly Jan 31, 2026
7f3b40b
Update tests/unit/test_io_hdf5_h5tools.py
rly Feb 6, 2026
62da597
Apply suggestion from @Copilot
rly Feb 6, 2026
1641fc4
Clean up write_builder
rly Feb 6, 2026
a98e112
Merge branch 'staging_expand' of github.com:hdmf-dev/hdmf into stagin…
rly Feb 6, 2026
9ca356c
Apply suggestion from @Copilot
rly Feb 6, 2026
ad91c45
Apply suggestion from @Copilot
rly Feb 6, 2026
02a5242
Apply suggestion from @Copilot
rly Feb 6, 2026
0fbf0ec
Apply suggestion from @Copilot
rly Feb 6, 2026
70f0791
Apply suggestion from @Copilot
rly Feb 6, 2026
dd8b101
Apply suggestion from @Copilot
rly Feb 6, 2026
d350d2a
Apply suggestion from @Copilot
rly Feb 6, 2026
f6e9d46
Merge branch 'dev' into staging_expand
rly Feb 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,9 @@ def __get_namespaces(cls, file_obj):
'default': True},
{'name': 'herd', 'type': 'hdmf.common.resources.HERD',
'doc': 'A HERD object to populate with references.',
'default': None})
'default': None},
{'name': 'expandable', 'type': bool, 'default': True,
'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
def write(self, **kwargs):
"""Write the container to an HDF5 file."""
if self.__mode == 'r':
Expand Down Expand Up @@ -750,17 +752,18 @@ def close_linked_files(self):
'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently',
'default': True},
{'name': 'export_source', 'type': str,
'doc': 'The source of the builders when exporting', 'default': None})
'doc': 'The source of the builders when exporting', 'default': None},
{'name': 'expandable', 'type': bool, 'default': True,
'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
def write_builder(self, **kwargs):
f_builder = popargs('builder', kwargs)
link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
% (f_builder.name, self.source, kwargs))
for name, gbldr in f_builder.groups.items():
for gbldr in f_builder.groups.values():
self.write_group(self.__file, gbldr, **kwargs)
for name, dbldr in f_builder.datasets.items():
for dbldr in f_builder.datasets.values():
self.write_dataset(self.__file, dbldr, **kwargs)
for name, lbldr in f_builder.links.items():
for lbldr in f_builder.links.values():
self.write_link(self.__file, lbldr, export_source=kwargs.get("export_source"))
self.set_attributes(self.__file, f_builder.attributes)
self.__add_refs()
Expand Down Expand Up @@ -927,6 +930,8 @@ def _filler():
'default': True},
{'name': 'export_source', 'type': str,
'doc': 'The source of the builders when exporting', 'default': None},
{'name': 'expandable', 'type': bool, 'default': True,
'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')},
returns='the Group that was created', rtype=Group)
def write_group(self, **kwargs):
parent, builder = popargs('parent', 'builder', kwargs)
Expand Down Expand Up @@ -1027,21 +1032,24 @@ def write_link(self, **kwargs):
'default': True},
{'name': 'export_source', 'type': str,
'doc': 'The source of the builders when exporting', 'default': None},
{'name': 'expandable', 'type': bool, 'default': True,
'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')},
returns='the Dataset that was created', rtype=Dataset)
def write_dataset(self, **kwargs): # noqa: C901
""" Write a dataset to HDF5

The function uses other dataset-dependent write functions, e.g,
``__scalar_fill__``, ``__list_fill__``, and ``__setup_chunked_dset__`` to write the data.
"""
parent, builder = popargs('parent', 'builder', kwargs)
parent, builder, expandable = popargs('parent', 'builder', 'expandable', kwargs)
link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
if self.get_written(builder):
self.logger.debug(" DatasetBuilder '%s' is already written" % builder.name)
return None
name = builder.name
data = builder.data
matched_spec_shape = builder.spec_shapes
dataio = None
options = dict() # dict with additional
if isinstance(data, H5DataIO):
Expand Down Expand Up @@ -1157,8 +1165,9 @@ def _filler():
elif len(np.shape(data)) == 0:
dset = self.__scalar_fill__(parent, name, data, options)
else:
dset = self.__list_fill__(parent, name, data, options)
dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options)
# Write a dataset containing references, i.e., object reference.

# NOTE: we can ignore options['io_settings'] for scalar data
elif self.__is_ref(options['dtype']):
_dtype = self.__dtypes.get(options['dtype'])
Expand Down Expand Up @@ -1215,7 +1224,7 @@ def _filler():
self.__dci_queue.append(dataset=dset, data=data)
# Write a regular in memory array (e.g., numpy array, list etc.)
elif hasattr(data, '__len__'):
dset = self.__list_fill__(parent, name, data, options)
dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options)
# Write a regular scalar dataset
else:
dset = self.__scalar_fill__(parent, name, data, options)
Expand Down Expand Up @@ -1343,7 +1352,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None):
return dset

@classmethod
def __list_fill__(cls, parent, name, data, options=None):
def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, options=None):
# define the io settings and data type if necessary
io_settings = {}
dtype = None
Expand All @@ -1365,6 +1374,11 @@ def __list_fill__(cls, parent, name, data, options=None):
data_shape = (len(data),)
else:
data_shape = get_data_shape(data)
if expandable:
# Don't override existing settings
if 'maxshape' not in io_settings:
if matched_spec_shape is not None:
io_settings['maxshape'] = matched_spec_shape

# Create the dataset
try:
Expand Down
16 changes: 12 additions & 4 deletions src/hdmf/build/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,9 @@ class DatasetBuilder(BaseBuilder):
'doc': 'The datatype of this dataset.', 'default': None},
{'name': 'attributes', 'type': dict,
'doc': 'A dictionary of attributes to create in this dataset.', 'default': dict()},
{'name': 'spec_shapes', 'type': tuple,
'doc': ('The shape(s) defined in the spec.'),
'default': None},
{'name': 'dimension_labels', 'type': tuple,
'doc': ('A list of labels for each dimension of this dataset from the spec. Currently this is '
'supplied only on build.'),
Expand All @@ -339,22 +342,27 @@ class DatasetBuilder(BaseBuilder):
{'name': 'source', 'type': str, 'doc': 'The source of the data in this builder.', 'default': None})
def __init__(self, **kwargs):
""" Create a Builder object for a dataset """
name, data, dtype, attributes, dimension_labels, maxshape, chunks, parent, source = getargs(
'name', 'data', 'dtype', 'attributes', 'dimension_labels', 'maxshape', 'chunks', 'parent', 'source',
kwargs
)
name, data, dtype, attributes, spec_shapes, dimension_labels, maxshape, chunks, parent, source = getargs(
'name', 'data', 'dtype', 'attributes', 'spec_shapes', 'dimension_labels', 'maxshape', 'chunks',
'parent', 'source', kwargs)
super().__init__(name, attributes, parent, source)
self['data'] = data
self['attributes'] = _copy.copy(attributes)
self.__dimension_labels = dimension_labels
self.__chunks = chunks
self.__spec_shapes = spec_shapes
self.__maxshape = maxshape
if isinstance(data, BaseBuilder):
if dtype is None:
dtype = self.OBJECT_REF_TYPE
self.__dtype = dtype
self.__name = name

@property
def spec_shapes(self):
"""The shapes defined in the spec."""
return self.__spec_shapes

@property
def data(self):
"""The data stored in the dataset represented by this builder."""
Expand Down
Loading
Loading