From f2594620ffcf07399255c02ef80cb21edcc2ccd1 Mon Sep 17 00:00:00 2001 From: Steven Garcia Date: Tue, 23 Jan 2024 16:14:26 -0600 Subject: [PATCH 1/8] Get default size from type if undefined --- supyr_struct/blocks/union_block.py | 2 +- supyr_struct/buffer.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/supyr_struct/blocks/union_block.py b/supyr_struct/blocks/union_block.py index 47d1a41..eed3592 100644 --- a/supyr_struct/blocks/union_block.py +++ b/supyr_struct/blocks/union_block.py @@ -387,7 +387,7 @@ def flush(self): # If they are smaller, some of the most significant bytes # arent used, which in big endian are the first bytes. u_type.serializer(u_node, self, None, self, 0, - desc.get(SIZE) - u_desc.get(SIZE)) + desc.get(SIZE, 0) - u_desc.get(SIZE, u_type.size)) else: u_type.serializer(u_node, self, None, self) diff --git a/supyr_struct/buffer.py b/supyr_struct/buffer.py index d869025..6dd686c 100644 --- a/supyr_struct/buffer.py +++ b/supyr_struct/buffer.py @@ -287,6 +287,9 @@ class BytearrayBuffer(bytearray, Buffer): Uses os.SEEK_SET, os.SEEK_CUR, and os.SEEK_END when calling seek. ''' __slots__ = ('_pos',) + def __init__(self, *args): + bytearray.__init__(self, *args) + Buffer.__init__(self, *args) def peek(self, count=None, offset=None): ''' From b53282119dd42a85ce408e9040cddaf7f6b52e73 Mon Sep 17 00:00:00 2001 From: Steven Garcia Date: Tue, 23 Jan 2024 16:15:02 -0600 Subject: [PATCH 2/8] bump version --- supyr_struct/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supyr_struct/__init__.py b/supyr_struct/__init__.py index 76aacfb..bf58840 100644 --- a/supyr_struct/__init__.py +++ b/supyr_struct/__init__.py @@ -87,8 +87,8 @@ # ############## __author__ = "Sigmmma" # YYYY.MM.DD -__date__ = "2020.10.30" -__version__ = (1, 5, 4) +__date__ = "2024.01.23" +__version__ = (1, 5, 5) __website__ = "https://github.com/Sigmmma/supyr_struct" From 475cd5605984cc1b67216d212d63aef1dc8e0709 Mon Sep 17 00:00:00 2001 From: Steven Garcia Date: Sat, 10 Feb 2024 13:47:07 -0600 Subject: [PATCH 3/8] New desc functions --- supyr_struct/util.py | 130 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 113 insertions(+), 17 deletions(-) diff --git a/supyr_struct/util.py b/supyr_struct/util.py index 88aa1be..6a4016f 100644 --- a/supyr_struct/util.py +++ b/supyr_struct/util.py @@ -87,7 +87,7 @@ def str_to_identifier(string): of invalid non-alphanumeric characters with an underscore. Trailing underscores are removed. ''' - assert isinstance(string, str) + assert isinstance(string, str), "Expected str, but got %s" % type(string) new_string = re.sub(non_alphanum_set, '_', string) new_string = re.sub(digits_at_start, '', new_string) @@ -98,13 +98,19 @@ def str_to_identifier(string): return new_string -def desc_variant(desc, *replacements): +def desc_variant(desc, *replacements, verify=False, **kwargs): ''' Fringe: Used to generate a new descriptor using a set of replacements. + Replacements can either be a field descriptor, or a tuple containing + the name of the field to replace, and the replacement field descriptor. + If verify is True, replacement fields will have their size checked to + ensure it matches the size of the replaced. If they don't match or it + can't be determiend if they do, a ValueError is thrown. desc_variant(some_descriptor, (str:name_of_old_field, FieldType:new_field_def), (str:name_of_another_old_field, FieldType:some_other_field_def), + FieldType:new_field_def_with_same_name_as_old, ) Ex: ```py @@ -112,33 +118,123 @@ def desc_variant(desc, *replacements): UInt32("one"), UInt32("two"), UInt32("three"), - ) + ) thing_variant = desc_variant(thing, - ("two", - Struct("new_two", UInt16("something"), Uint16("some_other")) - ), - ) + ("two", Struct("new_two", UInt16("something"), Uint16("some_other"))), + Struct("three", UInt16("aaaa"), Uint16("bbbb")), + ) ``` - This would make thing_variant a variant of thing where UInt32 "two" - is replaced by a Struct called "new_two". + This would make thing_variant a variant of thing where UInt32 "two" is + replaced by a Struct called "new_two", and "three" is similarly replaced. ''' desc, name_map = dict(desc), dict() + desc.update(kwargs) + + # NOTE: this function has been improved to make it much harder to + # accidentally replace the wrong field, or use a field with + # a mismatched size. If a name isn't provided, we're assumed + # to need to find something to replace with the same name as + # what we've been provided. Additionally, we can check that + # the size of the replacement is the same as what is replaced for i in range(desc['ENTRIES']): - name = desc[i].get('NAME', '_') - # padding uses _ as its name - if name == '_': - # Doing this is midly faster + sub_desc = desc[i] + name = sub_desc.get('NAME', None) + ftyp = sub_desc.get('TYPE') + + # padding uses _ as its name, so if it's + if name != "_": + name_map[str_to_identifier(name)] = i + elif not ftyp or ftyp.name != "Pad": + # dont let this silently cause bugs + raise ValueError("Expected padding, but got %s" % ftyp) + else: + # generate the name we expect the user to pass for the padding name_map['pad_%d' % i] = i - continue - name_map[str_to_identifier(name)] = i - for name, new_sub_desc in replacements: - desc[name_map[str_to_identifier(name)]] = new_sub_desc + for replacement in replacements: + name, new_sub_desc = None, None + + if isinstance(replacement, dict): + # we were provided just a desc + new_sub_desc = replacement + elif not(replacement and isinstance(replacement, (list, tuple))): + raise ValueError("Invalid replacement supplied: %s of type %s" % + (replacement, type(replacement)) + ) + else: + # we were given a list or tuple. figure out what was passed + for val in replacement: + if not name and isinstance(val, str): + name = val + elif not isinstance(val, dict): + raise ValueError("Unknown replacement value passed: %s" % val) + elif not new_sub_desc and val.get('TYPE'): + new_sub_desc = val + else: + raise ValueError("Unexpected replacement value passed: %s" % val) + + if not new_sub_desc: + raise ValueError("No replacement desc provided.") + + if name is None: + # we were provided a replacement desc without a target name. + # assume the name of the field we're replacing matches its name + name = new_sub_desc["NAME"] + + # figure out what index to put the replacement into, and + # (if requested) do some validation on the replacement. + index = name_map[str_to_identifier(name)] + verify_args = (desc, new_sub_desc, desc[index]) + if verify and get_replacement_field_conflict(*verify_args): + raise ValueError( + "Incompatible replacement detected for field '%s':\n\t%s" % + (name, get_replacement_field_conflict(*verify_args)) + ) + + desc[index] = new_sub_desc return desc +def desc_variant_with_verify(desc, *replacements, **kwargs): + '''Version of desc_variant with size verification defaulted to True.''' + kwargs.setdefault("verify", True) + return desc_variant(desc, *replacements, **kwargs) + + +def get_replacement_field_conflict(parent_desc, new_desc, old_desc): + ''' + Returns a string to indicate why a field is not a valid + replacement for another field. An empty string will be + returned if there are no conflicts. + ''' + parent_type = parent_desc["TYPE"] + old_type = old_desc["TYPE"] + new_type = new_desc["TYPE"] + old_size = old_desc.get("SIZE") if old_type.is_var_size else old_type.size + new_size = new_desc.get("SIZE") if new_type.is_var_size else new_type.size + + error_str = "" + error_args = () + if old_size is None and new_size is None: + # sizes are both undefined, so the parent must be a container. + # if not, then the size of both will be calculated later, and + # that means we can't verify they match here. + if not parent_type.is_container: + error_str = "Neither field size defined in non open-ended parent" + elif None in (old_size, new_size): + # only one field is missing its size. this is a problem, as + # we can't verify they match. + error_str = "One field size could not be determined - %s vs %s" + error_args = (old_size, new_size) + elif old_size != new_size: + error_str = "Field sizes dont match - %s vs %s" + error_args = (old_size, new_size) + + return error_str % error_args + + def is_in_dir(path, directory): '''Checks if path is in directory. Respects symlinks.''' try: From a2a867817a2cadfe6a89aed10dc1efaa90cce583 Mon Sep 17 00:00:00 2001 From: Steven Garcia Date: Sat, 10 Feb 2024 13:47:46 -0600 Subject: [PATCH 4/8] bump version --- supyr_struct/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supyr_struct/__init__.py b/supyr_struct/__init__.py index bf58840..a304371 100644 --- a/supyr_struct/__init__.py +++ b/supyr_struct/__init__.py @@ -87,8 +87,8 @@ # ############## __author__ = "Sigmmma" # YYYY.MM.DD -__date__ = "2024.01.23" -__version__ = (1, 5, 5) +__date__ = "2024.02.10" +__version__ = (1, 5, 6) __website__ = "https://github.com/Sigmmma/supyr_struct" From d5876ca98c1d8ce206b19c5bbf5c4b91c5c49aa3 Mon Sep 17 00:00:00 2001 From: Steven Garcia Date: Tue, 20 Feb 2024 07:31:32 -0600 Subject: [PATCH 5/8] misc cleanup --- supyr_struct/buffer.py | 6 +++--- supyr_struct/defs/block_def.py | 25 ++++++++++++++----------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/supyr_struct/buffer.py b/supyr_struct/buffer.py index 6dd686c..3223187 100644 --- a/supyr_struct/buffer.py +++ b/supyr_struct/buffer.py @@ -243,7 +243,7 @@ def seek(self, pos, whence=SEEK_SET): if whence == SEEK_SET: assert pos >= 0, "Read position cannot be negative." - if pos - 1 not in range(len(self)): + if pos not in range(len(self) + 1): raise IndexError('seek position out of range') self._pos = pos @@ -251,7 +251,7 @@ def seek(self, pos, whence=SEEK_SET): pos = self._pos + pos assert pos >= 0, "Read position cannot be negative." - if pos - 1 not in range(len(self)): + if pos not in range(len(self) + 1): raise IndexError('seek position out of range') self._pos = pos @@ -259,7 +259,7 @@ def seek(self, pos, whence=SEEK_SET): pos += len(self) assert pos >= 0, "Read position cannot be negative." - if pos - 1 not in range(len(self)): + if pos not in range(len(self) + 1): raise IndexError('seek position out of range') self._pos = pos diff --git a/supyr_struct/defs/block_def.py b/supyr_struct/defs/block_def.py index 33c020e..cda703f 100644 --- a/supyr_struct/defs/block_def.py +++ b/supyr_struct/defs/block_def.py @@ -663,28 +663,31 @@ def set_entry_count(self, src_dict, key=None): src_dict[ENTRIES] = int_count def str_to_name(self, string, reserved_names=reserved_desc_names, **kwargs): + e_str = "" try: - if not isinstance(string, str): - self._e_str += (("ERROR: INVALID TYPE FOR NAME. EXPECTED " + + e_str += (("ERROR: INVALID TYPE FOR NAME. EXPECTED " + "%s, GOT %s.\n") % (str, type(string))) - self._bad = True - return None - sanitized_str = str_to_identifier(string) + sanitized_str = "" if e_str else str_to_identifier(string) if not sanitized_str: - self._e_str += (("ERROR: CANNOT USE '%s' AS AN ATTRIBUTE " + + e_str += (("ERROR: CANNOT USE '%s' AS AN ATTRIBUTE " + "NAME.\nWHEN SANITIZED IT BECAME ''\n\n") % string) - self._bad = True - return None elif sanitized_str in reserved_names and\ not kwargs.get('allow_reserved', False): - self._e_str += ("ERROR: CANNOT USE THE RESERVED KEYWORD " + + e_str += ("ERROR: CANNOT USE THE RESERVED KEYWORD " + "'%s' AS AN ATTRIBUTE NAME.\n\n" % string) - self._bad = True - return None + + if e_str: + if self is None: + raise ValueError(e_str) + + self._e_str = e_str + self._bad = True + sanitized_str = None + return sanitized_str except Exception: print(format_exc()) From b8aa14553154bc679130fa262186619f1a78cbfa Mon Sep 17 00:00:00 2001 From: Steven Garcia Date: Tue, 20 Feb 2024 07:32:09 -0600 Subject: [PATCH 6/8] bump version --- supyr_struct/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supyr_struct/__init__.py b/supyr_struct/__init__.py index a304371..9011e24 100644 --- a/supyr_struct/__init__.py +++ b/supyr_struct/__init__.py @@ -87,8 +87,8 @@ # ############## __author__ = "Sigmmma" # YYYY.MM.DD -__date__ = "2024.02.10" -__version__ = (1, 5, 6) +__date__ = "2024.02.20" +__version__ = (1, 6, 0) __website__ = "https://github.com/Sigmmma/supyr_struct" From f5eba529ee12e575352913a6cb98fc8528caba6d Mon Sep 17 00:00:00 2001 From: Steven Garcia Date: Sat, 9 Mar 2024 23:18:22 -0600 Subject: [PATCH 7/8] Sound playback --- supyr_struct/blocks/list_block.py | 9 +- supyr_struct/defs/audio/wav.py | 201 ++++++++++++++++++++++++++---- 2 files changed, 185 insertions(+), 25 deletions(-) diff --git a/supyr_struct/blocks/list_block.py b/supyr_struct/blocks/list_block.py index 8706f96..66a5d6f 100644 --- a/supyr_struct/blocks/list_block.py +++ b/supyr_struct/blocks/list_block.py @@ -2,6 +2,7 @@ ''' from copy import deepcopy from sys import getsizeof +from types import MethodType from supyr_struct.blocks.block import Block from supyr_struct.defs.constants import DEF_SHOW, ALL_SHOW, SHOW_SETS,\ @@ -422,8 +423,12 @@ def index_by_id(self, node): Returns the index that node is in. Raises ValueError if node can not be found. ''' - return [id(list.__getitem__(self, i)) for - i in range(len(self))].index(id(node)) + return list( + map(id, + # NOTE: using list.__getitem__ to maximize speed + map(MethodType(list.__getitem__, self), + range(len(self)) + ))).index(id(node)) def get_size(self, attr_index=None, **context): ''' diff --git a/supyr_struct/defs/audio/wav.py b/supyr_struct/defs/audio/wav.py index 95a50c1..a354a3f 100644 --- a/supyr_struct/defs/audio/wav.py +++ b/supyr_struct/defs/audio/wav.py @@ -11,6 +11,46 @@ "data", "fact", "PEAK", + "LIST", + "id3 ", + ) + +list_type_sigs = ( + "INFO", + "adtl", + ) + +list_info_type_sigs = ( + # taken from here: + # https://www.recordingblogs.com/wiki/list-chunk-of-a-wave-file + "IARL", # The location where the subject of the file is archived + "IART", # The artist of the original subject of the file + "ICMS", # The name of the person or organization that commissioned the original subject of the file + "ICMT", # General comments about the file or its subject + "ICOP", # Copyright information about the file (e.g., "Copyright Some Company 2011") + "ICRD", # The date the subject of the file was created (creation date) (e.g., "2022-12-31") + "ICRP", # Whether and how an image was cropped + "IDIM", # The dimensions of the original subject of the file + "IDPI", # Dots per inch settings used to digitize the file + "IENG", # The name of the engineer who worked on the file + "IGNR", # The genre of the subject + "IKEY", # A list of keywords for the file or its subject + "ILGT", # Lightness settings used to digitize the file + "IMED", # Medium for the original subject of the file + "INAM", # Title of the subject of the file (name) + "IPLT", # The number of colors in the color palette used to digitize the file + "IPRD", # Name of the title the subject was originally intended for + "ISBJ", # Description of the contents of the file (subject) + "ISFT", # Name of the software package used to create the file + "ISRC", # The name of the person or organization that supplied the original subject of the file + "ISRF", # The original form of the material that was digitized (source form) + "ITCH", # The name of the technician who digitized the subject file + ) + +list_adtl_type_sigs = ( + "labl", + "note", + "ltxt", ) wav_formats = ( @@ -38,6 +78,19 @@ def fmt_extra_data_size(parent=None, new_value=None, *args, **kwargs): parent.length = new_value + 16 +def get_set_chunk_size(parent=None, new_value=None, *args, **kwargs): + if parent is None: + return 0 + if new_value is None: + return ((parent.data_size+3)//4)*4 + + parent.data_size = ((new_value+3)//4)*4 + +def get_list_chunk_size(list_data): + return 4 + sum( + 8 + get_set_chunk_size(parent=p) for p in list_data + ) + def has_next_chunk(rawdata=None, **kwargs): try: data = rawdata.peek(8) @@ -49,6 +102,18 @@ def has_next_chunk(rawdata=None, **kwargs): except AttributeError: return False +def has_next_list_sub_chunk(parent=None, rawdata=None, **kwargs): + if None in (parent, rawdata): + return False + + try: + return ( + get_set_chunk_size(parent=parent.parent) > + get_list_chunk_size(parent.parent.list_data) + ) + except Exception: + pass + def get_chunk_type(rawdata=None, **kwargs): try: data = rawdata.peek(4) @@ -57,7 +122,6 @@ def get_chunk_type(rawdata=None, **kwargs): except AttributeError: pass - def chunk_extra_data_size(parent=None, rawdata=None, new_value=None, extra_size=0, **kwargs): if new_value is None: @@ -85,39 +149,64 @@ def peak_chunk_extra_data_size(parent=None, rawdata=None, new_value=None, parent=parent, rawdata=rawdata, new_value=new_value, extra_size=8 + 8 * channel_count, **kwargs) +def read_write_id3_data_size( + parent=None, writebuffer=None, rawdata=None, offset=0, root_offset=0, **kwargs + ): + buffer = writebuffer if rawdata is None else rawdata + if not parent or buffer is None: + return -peak_position = QStruct("peak_position", - Float("value"), - UInt32("position"), - ) + try: + buffer.seek(offset + root_offset) + + # it's weird, but here's how they define the size: + # The ID3 tag size is encoded with four bytes where the first bit (bit 7) + # is set to zero in every byte, making a total of 28 bits. The zeroed bits + # are ignored, so a 257 bytes long tag is represented as $00 00 02 01. + if writebuffer is not None: + buffer.write(bytes( + (size >> (7 * (3 - i))) & 0x7F + for i, val in enumerate([parent.frame_data_size] * 4) + )) + else: + parent.frame_data_size = sum( + (b & 0x7F) << 8*i + for i, b in enumerate(buffer.read(4)) + ) -chunk_sig_enum = UEnum32("sig", - *((fourcc, fourcc[::-1]) for fourcc in chunk_sigs), - EDITABLE=False - ) + return offset + 4 + except Exception: + pass + +def Chunk(name, all_sigs, sig_default, *fields, **desc): + return Container(name, + UEnum32("sig", + *((sig, sig[::-1]) for sig in all_sigs), + DEFAULT=sig_default[::-1] + ), + UInt32("data_size", EDITABLE=False), + *fields, + **desc + ) -unknown_chunk = Container("unknown_chunk", - UEnum32("sig", INCLUDE=chunk_sig_enum), - UInt32("data_size", EDITABLE=False), +unknown_chunk = Chunk("unknown_chunk", + chunk_sigs, '\x00\x00\x00\x00', BytesRaw("data", SIZE=chunk_extra_data_size) ) -data_chunk = Container("data_chunk", - UEnum32("sig", INCLUDE=chunk_sig_enum, DEFAULT="atad"), - UInt32("data_size", EDITABLE=False), +data_chunk = Chunk("data_chunk", + chunk_sigs, 'data', BytesRaw("data", SIZE=chunk_extra_data_size) ) -fact_chunk = Container("fact_chunk", - UEnum32("sig", INCLUDE=chunk_sig_enum, DEFAULT="tcaf"), - UInt32("data_size", DEFAULT=4, EDITABLE=False), - UInt32("sample_count"), - BytesRaw("data", SIZE=fact_chunk_extra_data_size, VISIBLE=False) + +peak_position = QStruct("peak_position", + Float("value"), + UInt32("position"), ) -peak_chunk = Container("peak_chunk", - UEnum32("sig", INCLUDE=chunk_sig_enum, DEFAULT="KAEP"), - UInt32("data_size", EDITABLE=False), +peak_chunk = Chunk("peak", + chunk_sigs, 'PEAK', UInt32("version"), Timestamp32("timestamp"), Array("peak", @@ -128,6 +217,70 @@ def peak_chunk_extra_data_size(parent=None, rawdata=None, new_value=None, ) +fact_chunk = Chunk("fact", + chunk_sigs, 'fact', + UInt32("sample_count"), + BytesRaw("data", SIZE=fact_chunk_extra_data_size, VISIBLE=False) + ) + + +adtl_sub_chunk = Chunk("label", + list_adtl_type_sigs, "\x00\x00\x00\x00", + BytesRaw("data", SIZE=chunk_extra_data_size, VISIBLE=False) + ) + +info_sub_chunk = Chunk("list_info", + list_info_type_sigs, "\x00\x00\x00\x00", + StrLatin1("info", SIZE=get_set_chunk_size), + ) + +list_chunk = Chunk("list_chunk", + chunk_sigs, 'LIST', + UEnum32("list_type_sig", + *((sig, sig[::-1]) for sig in list_type_sigs) + ), + Switch("list_data", + CASE=".list_type_sig.enum_name", + CASES={ + "INFO": WhileArray("list_data", + SUB_STRUCT=info_sub_chunk, + CASE=has_next_list_sub_chunk + ), + "adtl": WhileArray("list_data", + SUB_STRUCT=adtl_sub_chunk, + CASE=has_next_list_sub_chunk + ) + } + ) + ) + + +id3_chunk = Chunk("id3_chunk", + chunk_sigs, 'id3 ', + # what the fuck is up with this spec? it's like it was written by a + # sanitarium patient. Look at the comment in read_write_id3_data_size + # to get an idea. anyway, the spec is here: + # https://mutagen-specs.readthedocs.io/en/latest/id3/id3v2.2.html#id3v2-header + UInt24("id3_sig", DEFAULT="ID3", EDITABLE=False, ENDIAN=">"), + UInt8("version"), + UInt8("revision"), + Bool8("flags", + "uses_unsynchronisation", + "uses_compression", + ), + # okay so, this value was designed by essentially a skooma + # eater, so you'll have to bear with how it's calculated. + WritableComputed("frame_data_size", + COMPUTE_READ=read_write_id3_data_size, + COMPUTE_WRITE=read_write_id3_data_size, + SIZE=4, EDITABLE=False, MAX=((1<<27) - 1) + ), + # yeah so, the frame data spec is even more weird. we're not gonna bother + # trying to parse it, and instead just read it as a byte string. + BytesRaw("frame_data", SIZE=".frame_data_size", MAX=((1<<27) - 1)), + ) + + wav_header = QStruct("wav_header", UInt32("riff_sig", DEFAULT="FFIR", EDITABLE=False), UInt32("filesize"), @@ -160,6 +313,8 @@ def peak_chunk_extra_data_size(parent=None, rawdata=None, new_value=None, "data": data_chunk, "fact": fact_chunk, "PEAK": peak_chunk, + "LIST": list_chunk, + "id3 ": id3_chunk, } ) From bfa98fb16160d6bea0d9b4ef09e3f8ff7dc24a0a Mon Sep 17 00:00:00 2001 From: Steven Garcia Date: Sat, 9 Mar 2024 23:18:55 -0600 Subject: [PATCH 8/8] bump version --- supyr_struct/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supyr_struct/__init__.py b/supyr_struct/__init__.py index 9011e24..ae88578 100644 --- a/supyr_struct/__init__.py +++ b/supyr_struct/__init__.py @@ -87,8 +87,8 @@ # ############## __author__ = "Sigmmma" # YYYY.MM.DD -__date__ = "2024.02.20" -__version__ = (1, 6, 0) +__date__ = "2024.03.09" +__version__ = (1, 7, 0) __website__ = "https://github.com/Sigmmma/supyr_struct"