ethteck · Dragorn421 · Apr 5, 2026 · May 13, 2026 · May 19, 2026 · May 21, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # splat Release Notes
 
+### 0.40.2
+
+* Expand `dump_symbols` output, also providing segment and subsegment information per symbol.
+* Add `dump_symbols_references` option alongside `dump_symbols`, adding a `referenced_by` column to the `splat_symbols.csv` file.
+
 ### 0.40.1
 
 * Always write the link dependency file.

diff --git a/docs/Advanced.md b/docs/Advanced.md
@@ -5,3 +5,228 @@ The following list contains examples of custom segments:
 - [RNC](https://github.com/mkst/sssv/blob/master/tools/splat_ext/rnc.py)
 - [Vtx](https://github.com/mkst/sssv/blob/master/tools/splat_ext/sssv_vtx.py)
 - [Multiple](https://github.com/pmret/papermario/tree/main/tools/splat_ext)
+
+## Visualizing the relationships between symbols
+
+This section describes how to graph relationships between symbols inside a splat segment, which can help to split sections and also pair them together (in particular, splitting and pairing asm and rodata sections).
+
+Prerequisite: enable the [`dump_symbols`](Configuration.md#dump_symbols) and [`dump_symbols_references`](Configuration.md#dump_symbols_references) options, and run `splat split`.
+
+You can then parse `.splat/splat_symbols.csv` and use it to gain insights on how the sections are laid out.
+
+For example, take the following script:
+
+<details>
+
+```py
+#!/usr/bin/env python3
+# SPDX-License-Identifier: CC0-1.0
+# graph_cross_sections_refs.py
+
+import argparse
+import csv
+import dataclasses
+
+
+@dataclasses.dataclass(frozen=True)
+class Sym:
+    vram_start: int
+    name: str
+    type: str
+    segment: str
+    subsegment: str
+    subsegment_type: str
+    referenced_by: tuple[str, ...]
+
+
+syms = list[Sym]()
+
+with open(".splat/splat_symbols.csv") as f:
+    for row in csv.DictReader(f):
+        if row["referenced_by"] == "":
+            referenced_by = []
+        else:
+            referenced_by = row["referenced_by"].split("|")
+        syms.append(
+            Sym(
+                int(row["vram_start"], 16),
+                row["name"],
+                row["type"],
+                row["segment"],
+                row["subsegment"],
+                row["subsegment_type"],
+                tuple(referenced_by),
+            )
+        )
+
+sym_by_name = {_sym.name: _sym for _sym in syms}
+
+parser = argparse.ArgumentParser()
+parser.add_argument("segment")
+parser.add_argument(
+    "--section",
+    nargs="+",
+    help=(
+        "only show this section besides text,"
+        " eg --section rodata will only show text and rodata"
+    ),
+)
+args = parser.parse_args()
+
+section_by_subsegment_type = {
+    "asm": "text",
+    "c": "text",
+    "textbin": "text",
+    "hasm": "text",
+    "data": "data",
+    "rodata": "rodata",
+    ".rodata": "rodata",
+    "bss": "bss",
+}
+
+syms_by_section: dict[str, list[Sym]] = {}
+for sym in syms:
+    if sym.segment != args.segment:
+        continue
+    section = section_by_subsegment_type.get(sym.subsegment_type)
+    assert section is not None, sym
+    syms_by_section.setdefault(section, []).append(sym)
+
+text_subsegments = sorted({_sym.subsegment for _sym in syms_by_section["text"]})
+color_by_subsegment: dict[str, str] = {}
+for subsegment in text_subsegments:
+    h = (len(color_by_subsegment) * 0.7) % 1
+    color_by_subsegment[subsegment] = f"{h} 1 1"
+
+if args.section:
+    for section in list(syms_by_section.keys()):
+        if section != "text" and section not in args.section:
+            del syms_by_section[section]
+
+section_by_sym_name = {
+    _sym.name: _section for _section, _syms in syms_by_section.items() for _sym in _syms
+}
+
+vram_start_by_section: dict[str, int] = {}
+for section, section_syms in syms_by_section.items():
+    vram_start_by_section[section] = min(_s.vram_start for _s in section_syms)
+
+
+colw = 10
+x_by_section = {
+    "text": 0 * colw,
+    "data": 1 * colw,
+    "rodata": 2 * colw,
+    "bss": 3 * colw,
+}
+
+
+def gprint(l: str):
+    print(l)
+
+
+gprint("digraph {")
+
+for section, section_syms in syms_by_section.items():
+    section_vram_start = vram_start_by_section[section]
+    x = x_by_section[section]
+    filtered_syms: list[Sym] = []
+    for sym in sorted(section_syms, key=lambda sym: sym.vram_start):
+        if sym.type in {"label", "jtbl_label"}:
+            continue
+        filtered_syms.append(sym)
+    cur_subsegment = None
+    i = 0
+    dy = 0
+    for sym in filtered_syms:
+        if cur_subsegment != sym.subsegment:
+            if cur_subsegment is not None:
+                gprint("}")
+            cur_subsegment = sym.subsegment
+            gprint(f"subgraph cluster_{cur_subsegment}_{section} " "{")
+            y = -i / len(filtered_syms) * 100 + dy - 0.2
+            gprint(f'"{cur_subsegment} {section}"' " [" f' pos = "{x},{y}!"' f' color="none"' " ]")
+            dy -= 0.8
+        assert cur_subsegment is not None
+        if 0:
+            # y = vram position
+            y = -(sym.vram_start - section_vram_start) / 500
+        y = -i / len(filtered_syms) * 100 + dy
+        i += 1
+        color = None
+        if section == "text":
+            color = color_by_subsegment[cur_subsegment]
+        elif section == "rodata":
+            if sym.type == "jtbl":
+                color = "magenta"
+        gprint(
+            f'"{sym.name}"'
+            " ["
+            f' pos = "{x},{y}!"'
+            + (f' color="{color}"' if color is not None else "")
+            + " ]"
+        )
+    if cur_subsegment is not None:
+        gprint("}")
+
+for section, section_syms in syms_by_section.items():
+    for sym in section_syms:
+        for sym_ref_by in sym.referenced_by:
+            if (
+                # ignore references from outside the segment
+                sym_ref_by in section_by_sym_name
+                # ignore same-section references
+                and section_by_sym_name[sym_ref_by] != section
+                # only show
+                and (
+                    # references from text
+                    section_by_sym_name[sym_ref_by] == "text"
+                    # or references from data to rodata
+                    or (
+                        section_by_sym_name[sym_ref_by] == "data"
+                        and section_by_subsegment_type[sym.subsegment_type] == "rodata"
+                    )
+                )
+            ):
+                try:
+                    color = color_by_subsegment[sym_by_name[sym_ref_by].subsegment]
+                except KeyError:
+                    color = "black"
+                gprint(f'"{sym_ref_by}" -> "{sym.name}"' f' [ color = "{color}" ]')
+
+gprint("}")
+```
+
+</details>
+
+This script takes as input the name of a splat segment, and produces a graph in dot language.
+Optionally, it can also be passed for example `--section rodata` to restrict the visualization to text and rodata sections.
+
+To render the script output, first save its output to a file, for example:
+
+```sh
+./graph_cross_sections_refs.py my_segment --section rodata > my_segment.dot
+```
+
+Then use graphviz to render it to svg (for example):
+
+```sh
+neato -Tsvg -O my_segment.dot
+```
+
+(on Ubuntu you can install graphviz with `apt install graphviz`)
+
+You can then open the `my_segment.dot.svg` file for viewing.
+
+The produced graph is laid out in columns: from left to right, the columns correspond to the text, data, rodata and bss sections. (note: if you passed e.g. `--section rodata` to the script, only text and rodata will be present)
+
+Symbols are further clustered by subsegments, indicated by black rectangles, and the name of the subsegment is indicated at the top of each cluster.
+Each text subsegment is colored differently.
+
+A suggested workflow based on this visualization is then to
+
+0. Pick a segment of interest
+1. Run `splat split`
+2. Generate and render a graph
+3. Refine the segment's subsegments splits and pairing
+4. Iterate from step 1 again until satisfied
diff --git a/docs/Configuration.md b/docs/Configuration.md
@@ -1,5 +1,43 @@
 Splat has various options for configuration, all of which are listed under the `options` section of the yaml file.
 
+## Debug / logging
+
+### dump_symbols
+
+Whether to write a symbols dump after splitting.
+
+The output is composed of csv files in a `.splat` folder relative to the [`base_path`](#base_path).
+
+The columns of the csv files may change at any point. If you write a tool depending on this output, it is in particular recommended to not make assumptions about the ordering or amount of columns (typically, new columns may be added). For example with Python, using `csv.DictReader` is recommended.
+
+#### Usage
+
+```yaml
+dump_symbols: true
+```
+
+#### Default
+
+`false`
+
+### dump_symbols_references
+
+Whether to also include symbol references (which symbols reference which symbols) in the symbols dump.
+
+This option adds a `referenced_by` column to `.splat/splat_symbols.csv`. The value for the column is a `|`-separated list of symbol names that reference the symbol of the current csv line. An empty value indicates no reference to that symbol was found.
+
+This option is only useful in conjunction with [`dump_symbols`](#dump_symbols) being `true` and does nothing otherwise.
+
+#### Usage
+
+```yaml
+dump_symbols_references: true
+```
+
+#### Default
+
+`false`
+
 ## Project configuration
 
 ### base_path

diff --git a/src/splat/scripts/split.py b/src/splat/scripts/split.py
@@ -523,8 +523,12 @@ def dump_symbols() -> None:
 
     with open(splat_hidden_folder / "splat_symbols.csv", "w", encoding="utf-8") as f:
         f.write(
-            "vram_start,given_name,name,type,given_size,size,rom,defined,user_declared,referenced,extract\n"
+            "vram_start,given_name,name,type,given_size,size,rom,defined,user_declared,referenced,extract"
+            ",segment,subsegment,subsegment_type"
         )
+        if options.opts.dump_symbols_references:
+            f.write(",referenced_by")
+        f.write("\n")
         for s in sorted(symbols.all_symbols, key=lambda x: x.vram_start):
             f.write(f"{s.vram_start:X},{s.given_name},{s.name},{s.type},")
             if s.given_size is not None:
@@ -536,7 +540,29 @@ def dump_symbols() -> None:
                 f.write(f"0x{s.rom:X},")
             else:
                 f.write("None,")
-            f.write(f"{s.defined},{s.user_declared},{s.referenced},{s.extract}\n")
+            f.write(f"{s.defined},{s.user_declared},{s.referenced},{s.extract}")
+            if s.segment is not None:
+                f.write(f",{s.segment.name}")
+                subsegment = None
+                if hasattr(s.segment, "get_subsegment_for_ram"):
+                    subsegment = s.segment.get_subsegment_for_ram(s.vram_start)
+                if subsegment is not None:
+                    f.write(f",{subsegment.name},{subsegment.type}")
+                else:
+                    f.write(",None,None")
+            else:
+                f.write(",None,None,None")
+            if options.opts.dump_symbols_references:
+                f.write(",")
+                cs = symbols.spim_context.globalSegment.getSymbol(s.vram_start)
+                if cs is not None:
+                    f.write(
+                        "|".join(
+                            _rcs.getName()
+                            for _rcs in cs.referenceSymbols | cs.referenceFunctions
+                        )
+                    )
+            f.write("\n")
 
     symbols.spim_context.saveContextToFile(splat_hidden_folder / "spim_context.csv")
 

diff --git a/src/splat/util/options.py b/src/splat/util/options.py
@@ -12,6 +12,7 @@ class SplatOpts:
     # Debug / logging
     verbose: bool
     dump_symbols: bool
+    dump_symbols_references: bool
     modes: List[str]
 
     # Project configuration
@@ -448,6 +449,7 @@ def parse_include_asm_macro_style() -> Literal["default", "maspsx_hack"]:
     ret = SplatOpts(
         verbose=verbose,
         dump_symbols=p.parse_opt("dump_symbols", bool, False),
+        dump_symbols_references=p.parse_opt("dump_symbols_references", bool, False),
         modes=modes,
         base_path=base_path,
         target_path=p.parse_path(base_path, "target_path"),