From f4cd9d2a9eb2b60d742dd1c0bb173387196b786d Mon Sep 17 00:00:00 2001 From: Jake Bromberg Date: Sat, 14 Feb 2026 10:07:27 -0800 Subject: [PATCH] fix: remove UNIQUE constraints that cause btree overflow on long artist names The btree index backing UNIQUE (release_id, artist_name) overflows PostgreSQL's 2,704-byte limit when artist_name exceeds ~900 bytes. These constraints are redundant: import_csv.py deduplicates via unique_key before COPY, and dedup_releases.py drops them anyway during copy-swap. --- schema/create_database.sql | 6 ++---- tests/integration/test_schema.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/schema/create_database.sql b/schema/create_database.sql index cf44ab0..4d9f86d 100644 --- a/schema/create_database.sql +++ b/schema/create_database.sql @@ -30,8 +30,7 @@ CREATE TABLE IF NOT EXISTS release ( CREATE TABLE IF NOT EXISTS release_artist ( release_id integer NOT NULL REFERENCES release(id) ON DELETE CASCADE, artist_name text NOT NULL, - extra integer DEFAULT 0, -- 0 = main artist, 1 = extra credit - UNIQUE (release_id, artist_name) + extra integer DEFAULT 0 -- 0 = main artist, 1 = extra credit ); -- Tracks on releases @@ -47,8 +46,7 @@ CREATE TABLE IF NOT EXISTS release_track ( CREATE TABLE IF NOT EXISTS release_track_artist ( release_id integer NOT NULL REFERENCES release(id) ON DELETE CASCADE, track_sequence integer NOT NULL, - artist_name text NOT NULL, - UNIQUE (release_id, track_sequence, artist_name) + artist_name text NOT NULL ); -- ============================================ diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index 9074eaa..576a26c 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -115,6 +115,24 @@ def test_fk_constraints_with_cascade(self) -> None: } assert expected_fk_tables.issubset(fk_tables) + def test_no_unique_constraints_on_child_tables(self) -> None: + """Child tables must not have UNIQUE constraints (Python-level dedup handles this). + + UNIQUE constraints on text columns cause btree overflow when artist_name + exceeds ~900 bytes. Dedup is handled by import_csv.py's unique_key filtering. + """ + conn = self._connect() + with conn.cursor() as cur: + cur.execute(""" + SELECT tc.table_name, tc.constraint_name + FROM information_schema.table_constraints tc + WHERE tc.constraint_type = 'UNIQUE' + AND tc.table_name IN ('release_artist', 'release_track_artist') + """) + unique_constraints = cur.fetchall() + conn.close() + assert unique_constraints == [], f"Unexpected UNIQUE constraints: {unique_constraints}" + def test_schema_is_idempotent(self) -> None: """Running the schema twice doesn't error (IF NOT EXISTS).""" conn = psycopg.connect(self.db_url, autocommit=True)