From f4cd9d2a9eb2b60d742dd1c0bb173387196b786d Mon Sep 17 00:00:00 2001
From: Jake Bromberg <jake@funlandresearch.com>
Date: Sat, 14 Feb 2026 10:07:27 -0800
Subject: [PATCH] fix: remove UNIQUE constraints that cause btree overflow on
 long artist names

The btree index backing UNIQUE (release_id, artist_name) overflows
PostgreSQL's 2,704-byte limit when artist_name exceeds ~900 bytes.
These constraints are redundant: import_csv.py deduplicates via
unique_key before COPY, and dedup_releases.py drops them anyway
during copy-swap.
---
 schema/create_database.sql       |  6 ++----
 tests/integration/test_schema.py | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/schema/create_database.sql b/schema/create_database.sql
index cf44ab0..4d9f86d 100644
--- a/schema/create_database.sql
+++ b/schema/create_database.sql
@@ -30,8 +30,7 @@ CREATE TABLE IF NOT EXISTS release (
 CREATE TABLE IF NOT EXISTS release_artist (
     release_id      integer NOT NULL REFERENCES release(id) ON DELETE CASCADE,
     artist_name     text NOT NULL,
-    extra           integer DEFAULT 0, -- 0 = main artist, 1 = extra credit
-    UNIQUE (release_id, artist_name)
+    extra           integer DEFAULT 0  -- 0 = main artist, 1 = extra credit
 );
 
 -- Tracks on releases
@@ -47,8 +46,7 @@ CREATE TABLE IF NOT EXISTS release_track (
 CREATE TABLE IF NOT EXISTS release_track_artist (
     release_id      integer NOT NULL REFERENCES release(id) ON DELETE CASCADE,
     track_sequence  integer NOT NULL,
-    artist_name     text NOT NULL,
-    UNIQUE (release_id, track_sequence, artist_name)
+    artist_name     text NOT NULL
 );
 
 -- ============================================
diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py
index 9074eaa..576a26c 100644
--- a/tests/integration/test_schema.py
+++ b/tests/integration/test_schema.py
@@ -115,6 +115,24 @@ def test_fk_constraints_with_cascade(self) -> None:
         }
         assert expected_fk_tables.issubset(fk_tables)
 
+    def test_no_unique_constraints_on_child_tables(self) -> None:
+        """Child tables must not have UNIQUE constraints (Python-level dedup handles this).
+
+        UNIQUE constraints on text columns cause btree overflow when artist_name
+        exceeds ~900 bytes. Dedup is handled by import_csv.py's unique_key filtering.
+        """
+        conn = self._connect()
+        with conn.cursor() as cur:
+            cur.execute("""
+                SELECT tc.table_name, tc.constraint_name
+                FROM information_schema.table_constraints tc
+                WHERE tc.constraint_type = 'UNIQUE'
+                  AND tc.table_name IN ('release_artist', 'release_track_artist')
+            """)
+            unique_constraints = cur.fetchall()
+        conn.close()
+        assert unique_constraints == [], f"Unexpected UNIQUE constraints: {unique_constraints}"
+
     def test_schema_is_idempotent(self) -> None:
         """Running the schema twice doesn't error (IF NOT EXISTS)."""
         conn = psycopg.connect(self.db_url, autocommit=True)