Sigmmma · General-101 · Jan 23, 2024 · Jan 23, 2024 · Feb 10, 2024 · Feb 10, 2024
diff --git a/supyr_struct/__init__.py b/supyr_struct/__init__.py
@@ -87,8 +87,8 @@
 # ##############
 __author__ = "Sigmmma"
 #           YYYY.MM.DD
-__date__ = "2020.10.30"
-__version__ = (1, 5, 4)
+__date__ = "2024.03.09"
+__version__ = (1, 7, 0)
 __website__ = "https://github.com/Sigmmma/supyr_struct"
 
 

diff --git a/supyr_struct/blocks/list_block.py b/supyr_struct/blocks/list_block.py
@@ -2,6 +2,7 @@
 '''
 from copy import deepcopy
 from sys import getsizeof
+from types import MethodType
 
 from supyr_struct.blocks.block import Block
 from supyr_struct.defs.constants import DEF_SHOW, ALL_SHOW, SHOW_SETS,\
@@ -422,8 +423,12 @@ def index_by_id(self, node):
         Returns the index that node is in.
         Raises ValueError if node can not be found.
         '''
-        return [id(list.__getitem__(self, i)) for
-                i in range(len(self))].index(id(node))
+        return list(
+            map(id,
+            # NOTE: using list.__getitem__ to maximize speed
+            map(MethodType(list.__getitem__, self),
+            range(len(self))
+            ))).index(id(node))
 
     def get_size(self, attr_index=None, **context):
         '''

diff --git a/supyr_struct/blocks/union_block.py b/supyr_struct/blocks/union_block.py
@@ -387,7 +387,7 @@ def flush(self):
             # If they are smaller, some of the most significant bytes
             # arent used, which in big endian are the first bytes.
             u_type.serializer(u_node, self, None, self, 0,
-                              desc.get(SIZE) - u_desc.get(SIZE))
+                              desc.get(SIZE, 0) - u_desc.get(SIZE, u_type.size))
         else:
             u_type.serializer(u_node, self, None, self)
 

diff --git a/supyr_struct/buffer.py b/supyr_struct/buffer.py
@@ -243,23 +243,23 @@ def seek(self, pos, whence=SEEK_SET):
         if whence == SEEK_SET:
             assert pos >= 0, "Read position cannot be negative."
 
-            if pos - 1 not in range(len(self)):
+            if pos not in range(len(self) + 1):
                 raise IndexError('seek position out of range')
 
             self._pos = pos
         elif whence == SEEK_CUR:
             pos = self._pos + pos
             assert pos >= 0, "Read position cannot be negative."
 
-            if pos - 1 not in range(len(self)):
+            if pos not in range(len(self) + 1):
                 raise IndexError('seek position out of range')
 
             self._pos = pos
         elif whence == SEEK_END:
             pos += len(self)
             assert pos >= 0, "Read position cannot be negative."
 
-            if pos - 1 not in range(len(self)):
+            if pos not in range(len(self) + 1):
                 raise IndexError('seek position out of range')
 
             self._pos = pos
@@ -287,6 +287,9 @@ class BytearrayBuffer(bytearray, Buffer):
     Uses os.SEEK_SET, os.SEEK_CUR, and os.SEEK_END when calling seek.
     '''
     __slots__ = ('_pos',)
+    def __init__(self, *args):
+        bytearray.__init__(self, *args)
+        Buffer.__init__(self, *args)
 
     def peek(self, count=None, offset=None):
         '''

diff --git a/supyr_struct/defs/audio/wav.py b/supyr_struct/defs/audio/wav.py
@@ -11,6 +11,46 @@
     "data",
     "fact",
     "PEAK",
+    "LIST",
+    "id3 ",
+    )
+
+list_type_sigs = (
+    "INFO",
+    "adtl",
+    )
+
+list_info_type_sigs = (
+    # taken from here:
+    #   https://www.recordingblogs.com/wiki/list-chunk-of-a-wave-file
+    "IARL", # The location where the subject of the file is archived
+    "IART", # The artist of the original subject of the file
+    "ICMS", # The name of the person or organization that commissioned the original subject of the file
+    "ICMT", # General comments about the file or its subject
+    "ICOP", # Copyright information about the file (e.g., "Copyright Some Company 2011")
+    "ICRD", # The date the subject of the file was created (creation date) (e.g., "2022-12-31")
+    "ICRP", # Whether and how an image was cropped
+    "IDIM", # The dimensions of the original subject of the file
+    "IDPI", # Dots per inch settings used to digitize the file
+    "IENG", # The name of the engineer who worked on the file
+    "IGNR", # The genre of the subject
+    "IKEY", # A list of keywords for the file or its subject
+    "ILGT", # Lightness settings used to digitize the file
+    "IMED", # Medium for the original subject of the file
+    "INAM", # Title of the subject of the file (name)
+    "IPLT", # The number of colors in the color palette used to digitize the file
+    "IPRD", # Name of the title the subject was originally intended for
+    "ISBJ", # Description of the contents of the file (subject)
+    "ISFT", # Name of the software package used to create the file
+    "ISRC", # The name of the person or organization that supplied the original subject of the file
+    "ISRF", # The original form of the material that was digitized (source form)
+    "ITCH", # The name of the technician who digitized the subject file
+    )
+
+list_adtl_type_sigs = (
+    "labl",
+    "note",
+    "ltxt",
     )
 
 wav_formats = (
@@ -38,6 +78,19 @@ def fmt_extra_data_size(parent=None, new_value=None, *args, **kwargs):
 
     parent.length = new_value + 16
 
+def get_set_chunk_size(parent=None, new_value=None, *args, **kwargs):
+    if parent is None:
+        return 0
+    if new_value is None:
+        return ((parent.data_size+3)//4)*4
+
+    parent.data_size = ((new_value+3)//4)*4
+
+def get_list_chunk_size(list_data):
+    return 4 + sum(
+        8 + get_set_chunk_size(parent=p) for p in list_data
+        )
+
 def has_next_chunk(rawdata=None, **kwargs):
     try:
         data = rawdata.peek(8)
@@ -49,6 +102,18 @@ def has_next_chunk(rawdata=None, **kwargs):
     except AttributeError:
         return False
 
+def has_next_list_sub_chunk(parent=None, rawdata=None, **kwargs):
+    if None in (parent, rawdata):
+        return False
+
+    try:
+        return (
+            get_set_chunk_size(parent=parent.parent) > 
+            get_list_chunk_size(parent.parent.list_data)
+            )
+    except Exception:
+        pass
+
 def get_chunk_type(rawdata=None, **kwargs):
     try:
         data = rawdata.peek(4)
@@ -57,7 +122,6 @@ def get_chunk_type(rawdata=None, **kwargs):
     except AttributeError:
         pass
 
-
 def chunk_extra_data_size(parent=None, rawdata=None, new_value=None,
                           extra_size=0, **kwargs):
     if new_value is None:
@@ -85,39 +149,64 @@ def peak_chunk_extra_data_size(parent=None, rawdata=None, new_value=None,
         parent=parent, rawdata=rawdata, new_value=new_value,
         extra_size=8 + 8 * channel_count, **kwargs)
 
+def read_write_id3_data_size(
+        parent=None, writebuffer=None, rawdata=None, offset=0, root_offset=0, **kwargs
+        ):
+    buffer = writebuffer if rawdata is None else rawdata
+    if not parent or buffer is None:
+        return
 
-peak_position = QStruct("peak_position",
-    Float("value"),
-    UInt32("position"),
-    )
+    try:
+        buffer.seek(offset + root_offset)
+
+        # it's weird, but here's how they define the size:
+        #   The ID3 tag size is encoded with four bytes where the first bit (bit 7) 
+        #   is set to zero in every byte, making a total of 28 bits. The zeroed bits 
+        #   are ignored, so a 257 bytes long tag is represented as $00 00 02 01.
+        if writebuffer is not None:
+            buffer.write(bytes(
+                (size >> (7 * (3 - i))) & 0x7F
+                for i, val in enumerate([parent.frame_data_size] * 4)
+                ))
+        else:
+            parent.frame_data_size = sum(
+                (b & 0x7F) << 8*i 
+                for i, b in enumerate(buffer.read(4))
+                )
 
-chunk_sig_enum = UEnum32("sig",
-    *((fourcc, fourcc[::-1]) for fourcc in chunk_sigs),
-    EDITABLE=False
-    )
+        return offset + 4
+    except Exception:
+        pass
+
+def Chunk(name, all_sigs, sig_default, *fields, **desc):
+    return Container(name,
+        UEnum32("sig", 
+            *((sig, sig[::-1]) for sig in all_sigs),
+            DEFAULT=sig_default[::-1]
+            ),
+        UInt32("data_size", EDITABLE=False),
+        *fields,
+        **desc
+        )
 
-unknown_chunk = Container("unknown_chunk",
-    UEnum32("sig", INCLUDE=chunk_sig_enum),
-    UInt32("data_size", EDITABLE=False),
+unknown_chunk = Chunk("unknown_chunk",
+    chunk_sigs, '\x00\x00\x00\x00',
     BytesRaw("data", SIZE=chunk_extra_data_size)
     )
 
-data_chunk = Container("data_chunk",
-    UEnum32("sig", INCLUDE=chunk_sig_enum, DEFAULT="atad"),
-    UInt32("data_size", EDITABLE=False),
+data_chunk = Chunk("data_chunk",
+    chunk_sigs, 'data',
     BytesRaw("data", SIZE=chunk_extra_data_size)
     )
 
-fact_chunk = Container("fact_chunk",
-    UEnum32("sig", INCLUDE=chunk_sig_enum, DEFAULT="tcaf"),
-    UInt32("data_size", DEFAULT=4, EDITABLE=False),
-    UInt32("sample_count"),
-    BytesRaw("data", SIZE=fact_chunk_extra_data_size, VISIBLE=False)
+
+peak_position = QStruct("peak_position",
+    Float("value"),
+    UInt32("position"),
     )
 
-peak_chunk = Container("peak_chunk",
-    UEnum32("sig", INCLUDE=chunk_sig_enum, DEFAULT="KAEP"),
-    UInt32("data_size", EDITABLE=False),
+peak_chunk = Chunk("peak",
+    chunk_sigs, 'PEAK',
     UInt32("version"),
     Timestamp32("timestamp"),
     Array("peak",
@@ -128,6 +217,70 @@ def peak_chunk_extra_data_size(parent=None, rawdata=None, new_value=None,
     )
 
 
+fact_chunk = Chunk("fact",
+    chunk_sigs, 'fact',
+    UInt32("sample_count"),
+    BytesRaw("data", SIZE=fact_chunk_extra_data_size, VISIBLE=False)
+    )
+
+
+adtl_sub_chunk = Chunk("label",
+    list_adtl_type_sigs, "\x00\x00\x00\x00",
+    BytesRaw("data", SIZE=chunk_extra_data_size, VISIBLE=False)
+    )
+
+info_sub_chunk = Chunk("list_info",
+    list_info_type_sigs, "\x00\x00\x00\x00",
+    StrLatin1("info", SIZE=get_set_chunk_size),
+    )
+
+list_chunk = Chunk("list_chunk",
+    chunk_sigs, 'LIST',
+    UEnum32("list_type_sig", 
+        *((sig, sig[::-1]) for sig in list_type_sigs)
+        ),
+    Switch("list_data",
+        CASE=".list_type_sig.enum_name",
+        CASES={
+            "INFO": WhileArray("list_data", 
+                SUB_STRUCT=info_sub_chunk, 
+                CASE=has_next_list_sub_chunk
+                ),
+            "adtl": WhileArray("list_data", 
+                SUB_STRUCT=adtl_sub_chunk, 
+                CASE=has_next_list_sub_chunk
+                )
+            }
+        )
+    )
+
+
+id3_chunk = Chunk("id3_chunk",
+    chunk_sigs, 'id3 ',
+    # what the fuck is up with this spec? it's like it was written by a
+    # sanitarium patient. Look at the comment in read_write_id3_data_size
+    # to get an idea. anyway, the spec is here:
+    #   https://mutagen-specs.readthedocs.io/en/latest/id3/id3v2.2.html#id3v2-header
+    UInt24("id3_sig", DEFAULT="ID3", EDITABLE=False, ENDIAN=">"),
+    UInt8("version"),
+    UInt8("revision"),
+    Bool8("flags",
+        "uses_unsynchronisation",
+        "uses_compression",
+        ),
+    # okay so, this value was designed by essentially a skooma
+    # eater, so you'll have to bear with how it's calculated. 
+    WritableComputed("frame_data_size",
+        COMPUTE_READ=read_write_id3_data_size, 
+        COMPUTE_WRITE=read_write_id3_data_size,
+        SIZE=4, EDITABLE=False, MAX=((1<<27) - 1)
+        ),
+    # yeah so, the frame data spec is even more weird. we're not gonna bother
+    # trying to parse it, and instead just read it as a byte string.
+    BytesRaw("frame_data", SIZE=".frame_data_size", MAX=((1<<27) - 1)),
+    )
+
+
 wav_header = QStruct("wav_header",
     UInt32("riff_sig", DEFAULT="FFIR", EDITABLE=False),
     UInt32("filesize"),
@@ -160,6 +313,8 @@ def peak_chunk_extra_data_size(parent=None, rawdata=None, new_value=None,
         "data": data_chunk,
         "fact": fact_chunk,
         "PEAK": peak_chunk,
+        "LIST": list_chunk,
+        "id3 ": id3_chunk,
         }
     )
 

diff --git a/supyr_struct/defs/block_def.py b/supyr_struct/defs/block_def.py
@@ -663,28 +663,31 @@ def set_entry_count(self, src_dict, key=None):
             src_dict[ENTRIES] = int_count
 
     def str_to_name(self, string, reserved_names=reserved_desc_names, **kwargs):
+        e_str = ""
         try:
-
             if not isinstance(string, str):
-                self._e_str += (("ERROR: INVALID TYPE FOR NAME. EXPECTED " +
+                e_str += (("ERROR: INVALID TYPE FOR NAME. EXPECTED " +
                                  "%s, GOT %s.\n") % (str, type(string)))
-                self._bad = True
-                return None
 
-            sanitized_str = str_to_identifier(string)
+            sanitized_str = "" if e_str else str_to_identifier(string)
 
             if not sanitized_str:
-                self._e_str += (("ERROR: CANNOT USE '%s' AS AN ATTRIBUTE " +
+                e_str += (("ERROR: CANNOT USE '%s' AS AN ATTRIBUTE " +
                                  "NAME.\nWHEN SANITIZED IT BECAME ''\n\n") %
                                 string)
-                self._bad = True
-                return None
             elif sanitized_str in reserved_names and\
                  not kwargs.get('allow_reserved', False):
-                self._e_str += ("ERROR: CANNOT USE THE RESERVED KEYWORD " +
+                e_str += ("ERROR: CANNOT USE THE RESERVED KEYWORD " +
                                 "'%s' AS AN ATTRIBUTE NAME.\n\n" % string)
-                self._bad = True
-                return None
+
+            if e_str:
+                if self is None:
+                    raise ValueError(e_str)
+
+                self._e_str   = e_str
+                self._bad     = True
+                sanitized_str = None
+
             return sanitized_str
         except Exception:
             print(format_exc())