From 4236b5dc4954042d07f119c2dc7b0885c5f72746 Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Sun, 5 Apr 2026 08:54:38 +0200 Subject: [PATCH 1/7] Add `dump_symbols_segments` and `dump_symbols_references` options --- CHANGELOG.md | 1 + src/splat/scripts/split.py | 33 +++++++++++++++++++++++++++++++-- src/splat/util/options.py | 4 ++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5d3b653..bf10679a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ * Fix data-only splits missing the initial `macro.inc` include. * Make sure all plain text file read and writes have an explicit UTF-8 encoding. +* Add `dump_symbols_segments` and `dump_symbols_references` options alongside `dump_symbols`, adding columns to the `splat_symbols.csv` file. ### 0.39.0 diff --git a/src/splat/scripts/split.py b/src/splat/scripts/split.py index 98191acf..54584a90 100644 --- a/src/splat/scripts/split.py +++ b/src/splat/scripts/split.py @@ -523,8 +523,13 @@ def dump_symbols() -> None: with open(splat_hidden_folder / "splat_symbols.csv", "w", encoding="utf-8") as f: f.write( - "vram_start,given_name,name,type,given_size,size,rom,defined,user_declared,referenced,extract\n" + "vram_start,given_name,name,type,given_size,size,rom,defined,user_declared,referenced,extract" ) + if options.opts.dump_symbols_segments: + f.write(",segment,subsegment") + if options.opts.dump_symbols_references: + f.write(",referenced_by") + f.write("\n") for s in sorted(symbols.all_symbols, key=lambda x: x.vram_start): f.write(f"{s.vram_start:X},{s.given_name},{s.name},{s.type},") if s.given_size is not None: @@ -536,7 +541,31 @@ def dump_symbols() -> None: f.write(f"0x{s.rom:X},") else: f.write("None,") - f.write(f"{s.defined},{s.user_declared},{s.referenced},{s.extract}\n") + f.write(f"{s.defined},{s.user_declared},{s.referenced},{s.extract}") + if options.opts.dump_symbols_segments: + if s.segment is not None: + f.write(f",{s.segment.name}") + if hasattr(s.segment, "get_subsegment_for_ram"): + subsegment = s.segment.get_subsegment_for_ram(s.vram_start) + if subsegment is not None: + f.write(f",{subsegment.name}") + else: + f.write(",None") + else: + f.write(",None") + else: + f.write(",None,None") + if options.opts.dump_symbols_references: + f.write(",") + cs = symbols.spim_context.globalSegment.getSymbol(s.vram_start) + if cs is not None: + f.write( + "|".join( + _rcs.getName() + for _rcs in cs.referenceSymbols | cs.referenceFunctions + ) + ) + f.write("\n") symbols.spim_context.saveContextToFile(splat_hidden_folder / "spim_context.csv") diff --git a/src/splat/util/options.py b/src/splat/util/options.py index 27089d51..e645a4a8 100644 --- a/src/splat/util/options.py +++ b/src/splat/util/options.py @@ -12,6 +12,8 @@ class SplatOpts: # Debug / logging verbose: bool dump_symbols: bool + dump_symbols_segments: bool + dump_symbols_references: bool modes: List[str] # Project configuration @@ -448,6 +450,8 @@ def parse_include_asm_macro_style() -> Literal["default", "maspsx_hack"]: ret = SplatOpts( verbose=verbose, dump_symbols=p.parse_opt("dump_symbols", bool, False), + dump_symbols_segments=p.parse_opt("dump_symbols_segments", bool, False), + dump_symbols_references=p.parse_opt("dump_symbols_references", bool, False), modes=modes, base_path=base_path, target_path=p.parse_path(base_path, "target_path"), From ff09f9927394aa3f24794c7bca9c340a6a4139a3 Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Wed, 13 May 2026 02:46:54 +0200 Subject: [PATCH 2/7] add subsegment_type column --- src/splat/scripts/split.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/splat/scripts/split.py b/src/splat/scripts/split.py index 54584a90..4043e171 100644 --- a/src/splat/scripts/split.py +++ b/src/splat/scripts/split.py @@ -526,7 +526,7 @@ def dump_symbols() -> None: "vram_start,given_name,name,type,given_size,size,rom,defined,user_declared,referenced,extract" ) if options.opts.dump_symbols_segments: - f.write(",segment,subsegment") + f.write(",segment,subsegment,subsegment_type") if options.opts.dump_symbols_references: f.write(",referenced_by") f.write("\n") @@ -548,13 +548,13 @@ def dump_symbols() -> None: if hasattr(s.segment, "get_subsegment_for_ram"): subsegment = s.segment.get_subsegment_for_ram(s.vram_start) if subsegment is not None: - f.write(f",{subsegment.name}") + f.write(f",{subsegment.name},{subsegment.type}") else: - f.write(",None") + f.write(",None,None") else: - f.write(",None") + f.write(",None,None") else: - f.write(",None,None") + f.write(",None,None,None") if options.opts.dump_symbols_references: f.write(",") cs = symbols.spim_context.globalSegment.getSymbol(s.vram_start) From 70dad647e9c2491833a11755f73a11c0c50bf5e1 Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Tue, 19 May 2026 14:40:31 +0200 Subject: [PATCH 3/7] fix changelog --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf10679a..e646b5c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # splat Release Notes +### 0.40.2 + +* Add `dump_symbols_segments` and `dump_symbols_references` options alongside `dump_symbols`, adding columns to the `splat_symbols.csv` file. + ### 0.40.1 * Always write the link dependency file. @@ -19,7 +23,6 @@ * Fix data-only splits missing the initial `macro.inc` include. * Make sure all plain text file read and writes have an explicit UTF-8 encoding. -* Add `dump_symbols_segments` and `dump_symbols_references` options alongside `dump_symbols`, adding columns to the `splat_symbols.csv` file. ### 0.39.0 From b8dbfe45579dc3082a50751175895baac3f4a064 Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Thu, 21 May 2026 05:51:28 +0200 Subject: [PATCH 4/7] yeet dump_symbols_segments option --- CHANGELOG.md | 3 ++- src/splat/scripts/split.py | 25 +++++++++++-------------- src/splat/util/options.py | 2 -- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e646b5c1..4d46f1d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,8 @@ ### 0.40.2 -* Add `dump_symbols_segments` and `dump_symbols_references` options alongside `dump_symbols`, adding columns to the `splat_symbols.csv` file. +* Expand `dump_symbols` output, also providing segment and subsegment information per symbol. +* Add `dump_symbols_references` option alongside `dump_symbols`, adding a `referenced_by` column to the `splat_symbols.csv` file. ### 0.40.1 diff --git a/src/splat/scripts/split.py b/src/splat/scripts/split.py index 4043e171..9c9ec7fc 100644 --- a/src/splat/scripts/split.py +++ b/src/splat/scripts/split.py @@ -524,9 +524,8 @@ def dump_symbols() -> None: with open(splat_hidden_folder / "splat_symbols.csv", "w", encoding="utf-8") as f: f.write( "vram_start,given_name,name,type,given_size,size,rom,defined,user_declared,referenced,extract" + ",segment,subsegment,subsegment_type" ) - if options.opts.dump_symbols_segments: - f.write(",segment,subsegment,subsegment_type") if options.opts.dump_symbols_references: f.write(",referenced_by") f.write("\n") @@ -542,19 +541,17 @@ def dump_symbols() -> None: else: f.write("None,") f.write(f"{s.defined},{s.user_declared},{s.referenced},{s.extract}") - if options.opts.dump_symbols_segments: - if s.segment is not None: - f.write(f",{s.segment.name}") - if hasattr(s.segment, "get_subsegment_for_ram"): - subsegment = s.segment.get_subsegment_for_ram(s.vram_start) - if subsegment is not None: - f.write(f",{subsegment.name},{subsegment.type}") - else: - f.write(",None,None") - else: - f.write(",None,None") + if s.segment is not None: + f.write(f",{s.segment.name}") + subsegment = None + if hasattr(s.segment, "get_subsegment_for_ram"): + subsegment = s.segment.get_subsegment_for_ram(s.vram_start) + if subsegment is not None: + f.write(f",{subsegment.name},{subsegment.type}") else: - f.write(",None,None,None") + f.write(",None,None") + else: + f.write(",None,None,None") if options.opts.dump_symbols_references: f.write(",") cs = symbols.spim_context.globalSegment.getSymbol(s.vram_start) diff --git a/src/splat/util/options.py b/src/splat/util/options.py index e645a4a8..7ac99dfa 100644 --- a/src/splat/util/options.py +++ b/src/splat/util/options.py @@ -12,7 +12,6 @@ class SplatOpts: # Debug / logging verbose: bool dump_symbols: bool - dump_symbols_segments: bool dump_symbols_references: bool modes: List[str] @@ -450,7 +449,6 @@ def parse_include_asm_macro_style() -> Literal["default", "maspsx_hack"]: ret = SplatOpts( verbose=verbose, dump_symbols=p.parse_opt("dump_symbols", bool, False), - dump_symbols_segments=p.parse_opt("dump_symbols_segments", bool, False), dump_symbols_references=p.parse_opt("dump_symbols_references", bool, False), modes=modes, base_path=base_path, From 43a150d4f6bf57010faa8d342c54e415a71c597d Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Thu, 21 May 2026 06:15:40 +0200 Subject: [PATCH 5/7] add docs to docs/Configuration.md --- docs/Configuration.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/docs/Configuration.md b/docs/Configuration.md index 556d391a..83a4528c 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -1,5 +1,43 @@ Splat has various options for configuration, all of which are listed under the `options` section of the yaml file. +## Debug / logging + +### dump_symbols + +Whether to write a symbols dump after splitting. + +The output is composed of csv files in a `.splat` folder relative to the [`base_path`](#base_path). + +The columns of the csv files may change at any point. If you write a tool depending on this output, it is in particular recommended to not make assumptions about the ordering or amount of columns (typically, new columns may be added). For example with Python, using `csv.DictReader` is recommended. + +#### Usage + +```yaml +dump_symbols: true +``` + +#### Default + +`false` + +### dump_symbols_references + +Whether to also include symbol references (which symbols reference which symbols) in the symbols dump. + +This option adds a `referenced_by` column to `.splat/splat_symbols.csv`. The value for the column is a `|`-separated list of symbol names that reference the symbol of the current csv line. An empty value indicates no reference to that symbol was found. + +This option is only useful in conjunction with [`dump_symbols`](#dump_symbols) being `true` and does nothing otherwise. + +#### Usage + +```yaml +dump_symbols_references: true +``` + +#### Default + +`false` + ## Project configuration ### base_path From 3a5f9a88bf59a9885cca27a8bc4bf4557c403cea Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Thu, 21 May 2026 06:47:40 +0200 Subject: [PATCH 6/7] docs: Add "Visualizing the relationships between symbols" section to Advanced.md --- docs/Advanced.md | 224 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) diff --git a/docs/Advanced.md b/docs/Advanced.md index fe9b4974..c82cbac6 100644 --- a/docs/Advanced.md +++ b/docs/Advanced.md @@ -5,3 +5,227 @@ The following list contains examples of custom segments: - [RNC](https://github.com/mkst/sssv/blob/master/tools/splat_ext/rnc.py) - [Vtx](https://github.com/mkst/sssv/blob/master/tools/splat_ext/sssv_vtx.py) - [Multiple](https://github.com/pmret/papermario/tree/main/tools/splat_ext) + +## Visualizing the relationships between symbols + +This section describes how to graph relationships between symbols inside a splat segment, which can help to split sections and also pair them together (in particular, splitting and pairing asm and rodata sections). + +Prerequisite: enable the [`dump_symbols`](Configuration.md#dump_symbols) and [`dump_symbols_references`](Configuration.md#dump_symbols_references) options, and run `splat split`. + +You can then parse `.splat/splat_symbols.csv` and use it to gain insights on how the sections are laid out. + +For example, take the following script: + +
+ +```py +#!/usr/bin/env python3 +# SPDX-License-Identifier: CC0-1.0 +# graph_cross_sections_refs.py + +import argparse +import csv +import dataclasses + + +@dataclasses.dataclass(frozen=True) +class Sym: + vram_start: int + name: str + type: str + segment: str + subsegment: str + subsegment_type: str + referenced_by: tuple[str, ...] + + +syms = list[Sym]() + +with open(".splat/splat_symbols.csv") as f: + for row in csv.DictReader(f): + if row["referenced_by"] == "": + referenced_by = [] + else: + referenced_by = row["referenced_by"].split("|") + syms.append( + Sym( + int(row["vram_start"], 16), + row["name"], + row["type"], + row["segment"], + row["subsegment"], + row["subsegment_type"], + tuple(referenced_by), + ) + ) + +sym_by_name = {_sym.name: _sym for _sym in syms} + +parser = argparse.ArgumentParser() +parser.add_argument("segment") +parser.add_argument( + "--section", + nargs="+", + help=( + "only show this section besides text," + " eg --section rodata will only show text and rodata" + ), +) +args = parser.parse_args() + +section_by_subsegment_type = { + "asm": "text", + "c": "text", + "textbin": "text", + "hasm": "text", + "data": "data", + "rodata": "rodata", + ".rodata": "rodata", + "bss": "bss", +} + +syms_by_section: dict[str, list[Sym]] = {} +for sym in syms: + if sym.segment != args.segment: + continue + section = section_by_subsegment_type.get(sym.subsegment_type) + assert section is not None, sym + syms_by_section.setdefault(section, []).append(sym) + +text_subsegments = sorted({_sym.subsegment for _sym in syms_by_section["text"]}) +color_by_subsegment: dict[str, str] = {} +for subsegment in text_subsegments: + h = (len(color_by_subsegment) * 0.7) % 1 + color_by_subsegment[subsegment] = f"{h} 1 1" + +if args.section: + for section in list(syms_by_section.keys()): + if section != "text" and section not in args.section: + del syms_by_section[section] + +section_by_sym_name = { + _sym.name: _section for _section, _syms in syms_by_section.items() for _sym in _syms +} + +vram_start_by_section: dict[str, int] = {} +for section, section_syms in syms_by_section.items(): + vram_start_by_section[section] = min(_s.vram_start for _s in section_syms) + + +colw = 10 +x_by_section = { + "text": 0 * colw, + "data": 1 * colw, + "rodata": 2 * colw, + "bss": 3 * colw, +} + + +def gprint(l: str): + print(l) + + +gprint("digraph {") + +for section, section_syms in syms_by_section.items(): + section_vram_start = vram_start_by_section[section] + x = x_by_section[section] + filtered_syms: list[Sym] = [] + for sym in sorted(section_syms, key=lambda sym: sym.vram_start): + if sym.type in {"label", "jtbl_label"}: + continue + filtered_syms.append(sym) + cur_subsegment = None + i = 0 + dy = 0 + for sym in filtered_syms: + if cur_subsegment != sym.subsegment: + if cur_subsegment is not None: + gprint("}") + cur_subsegment = sym.subsegment + gprint(f"subgraph cluster_{cur_subsegment}_{section} " "{") + y = -i / len(filtered_syms) * 100 + dy - 0.2 + gprint(f'"{cur_subsegment} {section}"' " [" f' pos = "{x},{y}!"' f' color="none"' " ]") + dy -= 0.8 + assert cur_subsegment is not None + if 0: + # y = vram position + y = -(sym.vram_start - section_vram_start) / 500 + y = -i / len(filtered_syms) * 100 + dy + i += 1 + color = None + if section == "text": + color = color_by_subsegment[cur_subsegment] + elif section == "rodata": + if sym.type == "jtbl": + color = "magenta" + gprint( + f'"{sym.name}"' + " [" + f' pos = "{x},{y}!"' + + (f' color="{color}"' if color is not None else "") + + " ]" + ) + if cur_subsegment is not None: + gprint("}") + +for section, section_syms in syms_by_section.items(): + for sym in section_syms: + for sym_ref_by in sym.referenced_by: + if ( + # ignore references from outside the segment + sym_ref_by in section_by_sym_name + # ignore same-section references + and section_by_sym_name[sym_ref_by] != section + # only show + and ( + # references from text + section_by_sym_name[sym_ref_by] == "text" + # or references from data to rodata + or ( + section_by_sym_name[sym_ref_by] == "data" + and section_by_subsegment_type[sym.subsegment_type] == "rodata" + ) + ) + ): + try: + color = color_by_subsegment[sym_by_name[sym_ref_by].subsegment] + except KeyError: + color = "black" + gprint(f'"{sym_ref_by}" -> "{sym.name}"' f' [ color = "{color}" ]') + +gprint("}") +``` + +
+ +This script takes as input the name of a splat segment, and produces a graph in dot language. +Optionally, it can also be passed for example `--section rodata` to restrict the visualization to text and rodata sections. + +To render the script output, first save its output to a file, for example: + +```sh +./graph_cross_sections_refs.py my_segment --section rodata > my_segment.dot +``` + +Then use graphviz to render it to svg (for example): + +```sh +neato -Tsvg -O my_segment.dot +``` + +(on Ubuntu you can install graphviz with `apt install graphviz`) + +You can then open the `my_segment.dot.svg` file for viewing. + +The produced graph is laid out in columns: from left to right, the columns correspond to the text, data, rodata and bss sections. (note: if you passed e.g. `--section rodata` to the script, only text and rodata will be present) + +Symbols are further clustered by subsegments, indicated by black rectangles, and the name of the subsegment is indicated at the top of each cluster. +Each text subsegment is colored differently. + +A suggested workflow based on this visualization is then to +0. Pick a segment of interest +1. Run `splat split` +2. Generate and render a graph +3. Refine the segment's subsegments splits and pairing +4. Iterate from step 1 again until satisfied From 484c74dcfb3de3015ec7eda007c4a5433e43e7c7 Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Thu, 21 May 2026 06:52:43 +0200 Subject: [PATCH 7/7] fixup Advanced.md --- docs/Advanced.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Advanced.md b/docs/Advanced.md index c82cbac6..761fc8da 100644 --- a/docs/Advanced.md +++ b/docs/Advanced.md @@ -224,6 +224,7 @@ Symbols are further clustered by subsegments, indicated by black rectangles, and Each text subsegment is colored differently. A suggested workflow based on this visualization is then to + 0. Pick a segment of interest 1. Run `splat split` 2. Generate and render a graph