diff --git a/UPDATING.md b/UPDATING.md index 3d42f2b3d4e1..6bcb75136142 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -310,6 +310,55 @@ See `superset/mcp_service/PRODUCTION.md` for deployment guides. } ``` +### Composite primary keys on many-to-many association tables + +The eight M:N association tables listed below have been changed from a synthetic surrogate `id INTEGER PRIMARY KEY` to a composite `PRIMARY KEY (fk1, fk2)` on the two foreign-key columns. The `id` column is dropped, and the two tables that previously carried a redundant `UNIQUE (fk1, fk2)` constraint have that constraint removed (it is now subsumed by the composite primary key). + +**Affected tables and their composite-PK column pairs:** + +| Table | Composite PK | +|---|---| +| `dashboard_roles` | `(dashboard_id, role_id)` | +| `dashboard_slices` | `(dashboard_id, slice_id)` | +| `dashboard_user` | `(user_id, dashboard_id)` | +| `report_schedule_user` | `(user_id, report_schedule_id)` | +| `rls_filter_roles` | `(role_id, rls_filter_id)` | +| `rls_filter_tables` | `(table_id, rls_filter_id)` | +| `slice_user` | `(user_id, slice_id)` | +| `sqlatable_user` | `(user_id, table_id)` | + +**Impact on external readers:** Any BI tool, custom report, backup script, or external integration that references these tables by their old surrogate `id` column (e.g., `SELECT id FROM dashboard_slices WHERE …`, `WHERE dashboard_slices.id IN (…)`) will break. Update such queries to project or filter on the FK pair (`dashboard_id, slice_id`) instead. The FK columns themselves are unchanged. + +**Pre-flight inventory queries.** Before applying the upgrade, operators are encouraged to run the queries below against their database to assess what the migration will change. Two classes of pre-existing data are not preserved by the migration: duplicate `(fk1, fk2)` rows (the migration keeps `MIN(id)` and deletes the rest) and rows with `NULL` in either FK column (the migration deletes them, since FK columns are promoted to `NOT NULL` for the composite PK). Compliance- or audit-sensitive operators should also `\copy` (Postgres) or `SELECT … INTO OUTFILE` (MySQL) the affected rows for their own records before upgrading. + +```sql +-- Duplicate (fk1, fk2) pairs (the migration will keep MIN(id) per group, delete the rest) +SELECT dashboard_id, role_id, COUNT(*) FROM dashboard_roles GROUP BY dashboard_id, role_id HAVING COUNT(*) > 1; +SELECT dashboard_id, slice_id, COUNT(*) FROM dashboard_slices GROUP BY dashboard_id, slice_id HAVING COUNT(*) > 1; +SELECT user_id, dashboard_id, COUNT(*) FROM dashboard_user GROUP BY user_id, dashboard_id HAVING COUNT(*) > 1; +SELECT user_id, report_schedule_id, COUNT(*) FROM report_schedule_user GROUP BY user_id, report_schedule_id HAVING COUNT(*) > 1; +SELECT role_id, rls_filter_id, COUNT(*) FROM rls_filter_roles GROUP BY role_id, rls_filter_id HAVING COUNT(*) > 1; +SELECT table_id, rls_filter_id, COUNT(*) FROM rls_filter_tables GROUP BY table_id, rls_filter_id HAVING COUNT(*) > 1; +SELECT user_id, slice_id, COUNT(*) FROM slice_user GROUP BY user_id, slice_id HAVING COUNT(*) > 1; +SELECT user_id, table_id, COUNT(*) FROM sqlatable_user GROUP BY user_id, table_id HAVING COUNT(*) > 1; + +-- Rows with a NULL in either FK (the migration will delete these) +SELECT COUNT(*) FROM dashboard_roles WHERE dashboard_id IS NULL OR role_id IS NULL; +SELECT COUNT(*) FROM dashboard_slices WHERE dashboard_id IS NULL OR slice_id IS NULL; +SELECT COUNT(*) FROM dashboard_user WHERE user_id IS NULL OR dashboard_id IS NULL; +SELECT COUNT(*) FROM report_schedule_user WHERE user_id IS NULL OR report_schedule_id IS NULL; +SELECT COUNT(*) FROM rls_filter_roles WHERE role_id IS NULL OR rls_filter_id IS NULL; +SELECT COUNT(*) FROM rls_filter_tables WHERE table_id IS NULL OR rls_filter_id IS NULL; +SELECT COUNT(*) FROM slice_user WHERE user_id IS NULL OR slice_id IS NULL; +SELECT COUNT(*) FROM sqlatable_user WHERE user_id IS NULL OR table_id IS NULL; +``` + +**Restoring an old `pg_dump` (or equivalent) against the new schema.** A dump taken before the migration includes `INSERT` statements that populate the now-removed `id` column. Restoring such a dump against the post-migration schema will fail. The supported workaround is to dump only the schema and reference data, then re-create the M:N associations from application data after restore — for example with `pg_dump --exclude-table-data` (or per-table `--exclude-table-data=dashboard_slices` etc.) for the eight junction tables, restore the rest, then run a one-shot script that re-INSERTs `(fk1, fk2)` pairs derived from your application export. Operators who need to restore an old dump verbatim should restore against a pre-migration Superset and then re-run the upgrade. + +**Intentional downgrade asymmetry.** The migration's `downgrade()` restores the surrogate `id` column and (for `dashboard_slices` and `report_schedule_user`) the original `UNIQUE (fk1, fk2)` constraint, but it does **not** restore the original `NULL`-allowed state on the FK columns — they remain `NOT NULL`. This is intentional: under SQLAlchemy's `secondary=` semantics, a `NULL` in either FK column of a junction table is meaningless (it cannot participate in either side of the relationship). Operators downgrading are not expected to need this restored. The asymmetry is documented for completeness so that round-trip schema diffs are not mistaken for migration bugs. + +**Constraint-name divergence between upgrade and downgrade.** The composite primary key created on upgrade is named `pk_` (Alembic's default for `op.create_primary_key("pk_
", ...)`), while the surrogate `id` primary key restored on downgrade is named `
_pkey` (PostgreSQL's default convention for `PrimaryKeyConstraint("id")`). The two names alternate so that a round-trip (upgrade → downgrade → upgrade) does not collide on a pre-existing constraint name. Operators using schema-comparison tools (e.g. `pg_diff`, `migra`) against a downgraded database may see this as drift versus a fresh-install schema. It is cosmetic — no application code references either constraint name. + ## 6.0.0 - [33055](https://github.com/apache/superset/pull/33055): Upgrades Flask-AppBuilder to 5.0.0. The AUTH_OID authentication type has been deprecated and is no longer available as an option in Flask-AppBuilder. OpenID (OID) is considered a deprecated authentication protocol - if you are using AUTH_OID, you will need to migrate to an alternative authentication method such as OAuth, LDAP, or database authentication before upgrading. - [34871](https://github.com/apache/superset/pull/34871): Fixed Jest test hanging issue from Ant Design v5 upgrade. MessageChannel is now mocked in test environment to prevent rc-overflow from causing Jest to hang. Test environment only - no production impact. diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 6fb132e16137..b080b64aff49 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1212,9 +1212,18 @@ def data(self) -> dict[str, Any]: sqlatable_user = DBTable( "sqlatable_user", metadata, - Column("id", Integer, primary_key=True), - Column("user_id", Integer, ForeignKey("ab_user.id", ondelete="CASCADE")), - Column("table_id", Integer, ForeignKey("tables.id", ondelete="CASCADE")), + Column( + "user_id", + Integer, + ForeignKey("ab_user.id", ondelete="CASCADE"), + primary_key=True, + ), + Column( + "table_id", + Integer, + ForeignKey("tables.id", ondelete="CASCADE"), + primary_key=True, + ), ) @@ -2146,17 +2155,25 @@ def text(self, clause: str) -> TextClause: RLSFilterRoles = DBTable( "rls_filter_roles", metadata, - Column("id", Integer, primary_key=True), - Column("role_id", Integer, ForeignKey("ab_role.id"), nullable=False), - Column("rls_filter_id", Integer, ForeignKey("row_level_security_filters.id")), + Column("role_id", Integer, ForeignKey("ab_role.id"), primary_key=True), + Column( + "rls_filter_id", + Integer, + ForeignKey("row_level_security_filters.id"), + primary_key=True, + ), ) RLSFilterTables = DBTable( "rls_filter_tables", metadata, - Column("id", Integer, primary_key=True), - Column("table_id", Integer, ForeignKey("tables.id")), - Column("rls_filter_id", Integer, ForeignKey("row_level_security_filters.id")), + Column("table_id", Integer, ForeignKey("tables.id"), primary_key=True), + Column( + "rls_filter_id", + Integer, + ForeignKey("row_level_security_filters.id"), + primary_key=True, + ), ) diff --git a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py new file mode 100644 index 000000000000..055ecd3c9700 --- /dev/null +++ b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py @@ -0,0 +1,448 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""composite_pk_association_tables + +Replace the unused synthetic ``id INTEGER PRIMARY KEY`` on eight many-to-many +association tables with a composite primary key on the two FK columns. Drops +the now-redundant ``UniqueConstraint(fk1, fk2)`` on the two tables that +already carry one. Pre-flight: deletes rows with NULL FK values (six tables +allow them today) and any duplicate ``(fk1, fk2)`` rows. + +Motivated by SQLAlchemy-Continuum issue #129 (M2M restore against junction +tables with surrogate PKs); also closes the data-integrity hole where six +of the eight tables lacked DB-level uniqueness. + +Revision ID: 2bee73611e32 +Revises: 33d7e0e21daa +Create Date: 2026-05-01 23:36:34.050058 + +""" + +import logging +from typing import NamedTuple + +import sqlalchemy as sa +from alembic import op +from sqlalchemy import inspect +from sqlalchemy.engine import Connection + +# revision identifiers, used by Alembic. +revision = "2bee73611e32" +down_revision = "33d7e0e21daa" + +logger = logging.getLogger("alembic.env") + + +class AssociationTable(NamedTuple): + """A junction table being converted from surrogate-id PK to composite-FK PK.""" + + name: str + fk1: str + fk2: str + + +# Order is alphabetical by table name; deterministic for review and bisection. +AFFECTED_TABLES: list[AssociationTable] = [ + AssociationTable("dashboard_roles", "dashboard_id", "role_id"), + AssociationTable("dashboard_slices", "dashboard_id", "slice_id"), + AssociationTable("dashboard_user", "user_id", "dashboard_id"), + AssociationTable("report_schedule_user", "user_id", "report_schedule_id"), + AssociationTable("rls_filter_roles", "role_id", "rls_filter_id"), + AssociationTable("rls_filter_tables", "table_id", "rls_filter_id"), + AssociationTable("slice_user", "user_id", "slice_id"), + AssociationTable("sqlatable_user", "user_id", "table_id"), +] + +# These two tables already declare ``UniqueConstraint(fk1, fk2)`` in the model; +# the composite PK subsumes it, so the migration drops the redundant constraint. +TABLES_WITH_PRE_EXISTING_UNIQUE: set[str] = { + "dashboard_slices", + "report_schedule_user", +} + +# Documentation set: tables whose FK columns are nullable in their original +# create_table migrations (``dashboard_roles.dashboard_id`` from revision +# e11ccdd12658 is the most recent addition). ``report_schedule_user`` is the +# only affected table created with both FK columns ``NOT NULL`` and is +# intentionally absent here. This set is no longer consulted at runtime — the +# upgrade now runs the NULL-FK cleanup on every affected table because the +# DELETE is a cheap no-op when the columns are already NOT NULL, and that +# eliminates the risk of bugs from this set going stale (the +# ``dashboard_roles`` omission caught in PR review was exactly that bug). +TABLES_WITH_NULLABLE_FKS: set[str] = { + "dashboard_roles", + "dashboard_slices", + "dashboard_user", + "rls_filter_roles", + "rls_filter_tables", + "slice_user", + "sqlatable_user", +} + + +def _check_no_external_fks_to_id(conn: Connection) -> None: + """Raise ``RuntimeError`` if any foreign key in the database references one + of the eight junction-table ``id`` columns. Uses SQLAlchemy's ``Inspector`` + for dialect-agnostic introspection across PostgreSQL, MySQL, and SQLite. + + Scope limitation: ``Inspector.get_table_names()`` returns tables in the + connection's default schema only. On PostgreSQL deployments where Superset + metadata lives in a non-default schema, or on multi-schema deployments + that allow cross-schema FKs, an external FK in another schema would not + be detected. This is acceptable for the standard single-schema + deployment that Superset documents; operators with multi-schema + metadata should run the equivalent inventory query against + ``information_schema.referential_constraints`` themselves before + applying. + """ + affected = {t.name for t in AFFECTED_TABLES} + insp = inspect(conn) + for table_name in insp.get_table_names(): + if table_name in affected: + continue + for fk in insp.get_foreign_keys(table_name): + if fk["referred_table"] in affected and "id" in fk["referred_columns"]: + raise RuntimeError( + f"Cannot drop synthetic id from {fk['referred_table']}: " + f"external FK {fk.get('name', '')} on {table_name} " + f"references {fk['referred_table']}({fk['referred_columns']}). " + "Drop or migrate the referencing FK before applying this " + "migration." + ) + + +def _table_clause(t: AssociationTable) -> sa.sql.expression.TableClause: + """Build a lightweight SQLAlchemy ``TableClause`` for ``t`` exposing the + columns the helper queries reference (``id``, ``fk1``, ``fk2``). Used so + that the dedupe / cleanup / assert SQL can be expressed via SQLAlchemy + core constructs rather than via string interpolation.""" + return sa.table(t.name, sa.column("id"), sa.column(t.fk1), sa.column(t.fk2)) + + +def _delete_null_fk_rows(conn: Connection, t: AssociationTable) -> int: + """Delete rows where ``t.fk1`` or ``t.fk2`` is NULL on ``t.name``. + + Returns the deletion count. Required because primary-key columns must be + NOT NULL; the PK-add downstream would fail with a cryptic constraint + violation if any NULL-FK rows survived. Run unconditionally on every + affected table — see ``TABLES_WITH_NULLABLE_FKS`` above for the rationale. + """ + tbl = _table_clause(t) + stmt = sa.delete(tbl).where(sa.or_(tbl.c[t.fk1].is_(None), tbl.c[t.fk2].is_(None))) + result = conn.execute(stmt) + n = result.rowcount or 0 + if n: + logger.warning( + "Deleted %d row(s) with NULL FK from %s before composite-PK promotion", + n, + t.name, + ) + return n + + +def _dedupe_by_min_id(conn: Connection, t: AssociationTable) -> int: + """Delete duplicate ``(t.fk1, t.fk2)`` rows from ``t.name`` keeping ``MIN(id)``. + + Returns the deletion count. The ``NOT IN`` argument is wrapped in an + extra ``SELECT keep_id FROM (...) AS s`` derived table because MySQL + rejects ``DELETE FROM t WHERE id NOT IN (SELECT MIN(id) FROM t GROUP BY + ...)`` with ERROR 1093 unless the inner SELECT is materialized through + a derived table. SQLAlchemy's ``.subquery()`` produces that wrap. + + Logs a sample (up to 10) of the discarded ``(fk1, fk2, id)`` tuples at + WARN before deletion, so operators can audit which rows are dropped — + the "keep ``MIN(id)``" policy preserves the original row, which is + correct in practice but discards any later, semantically-identical + re-grants. + """ + tbl = _table_clause(t) + + keep_min = ( + sa.select(sa.func.min(tbl.c.id).label("keep_id")) + .group_by(tbl.c[t.fk1], tbl.c[t.fk2]) + .subquery("keep_min") + ) + keep_ids = sa.select(keep_min.c.keep_id) + discarded = tbl.c.id.notin_(keep_ids) + + sample_stmt = ( + sa.select(tbl.c[t.fk1], tbl.c[t.fk2], tbl.c.id).where(discarded).limit(10) + ) + sample = list(conn.execute(sample_stmt)) + + delete_stmt = sa.delete(tbl).where(discarded) + result = conn.execute(delete_stmt) + n = result.rowcount or 0 + if n: + logger.warning( + "Deduped %d duplicate row(s) from %s; sample of discarded " + "(%s, %s, id) tuples (up to 10): %s", + n, + t.name, + t.fk1, + t.fk2, + sample, + ) + return n + + +def _assert_no_duplicates(conn: Connection, t: AssociationTable) -> None: + """Raise ``RuntimeError`` if any ``(t.fk1, t.fk2)`` duplicate group remains. + + Called after ``_dedupe_by_min_id`` to surface silent dialect-dependent + dedupe failures (e.g., a MySQL syntax issue) as an actionable error + before the PK-add fires with a less-helpful constraint-violation message. + """ + tbl = _table_clause(t) + duplicate_groups = ( + sa.select(sa.literal(1)) + .select_from(tbl) + .group_by(tbl.c[t.fk1], tbl.c[t.fk2]) + .having(sa.func.count() > 1) + .subquery("duplicate_groups") + ) + count_stmt = sa.select(sa.func.count()).select_from(duplicate_groups) + if remaining := conn.scalar(count_stmt) or 0: + raise RuntimeError( + f"Dedupe failed for {t.name}: {remaining} duplicate " + f"({t.fk1}, {t.fk2}) groups remain after _dedupe_by_min_id. " + f"Check the dedupe SQL for dialect {conn.dialect.name}." + ) + + +def _build_pre_upgrade_table( + insp: sa.engine.reflection.Inspector, t: AssociationTable +) -> sa.Table: + """Build a ``Table`` object representing the pre-upgrade schema of ``t``, + explicitly *without* any redundant ``UniqueConstraint(t.fk1, t.fk2)``. + Used as ``copy_from`` to ``batch_alter_table`` so the rebuilt table + omits the unnamed UNIQUE constraint deterministically across dialects + (SQLite reflects unnamed UNIQUEs with ``name=None``, defeating the + standard ``batch_op.drop_constraint(name)`` path). + + Reflects column types and FK targets (with original FK constraint names + preserved) from the live database; only the redundant UNIQUE is omitted. + """ + md = sa.MetaData() + fks_for_col: dict[str, list[dict]] = {} + for fk in insp.get_foreign_keys(t.name): + for col_name in fk["constrained_columns"]: + fks_for_col.setdefault(col_name, []).append(fk) + + cols: list[sa.Column] = [] + for c in insp.get_columns(t.name): + col_kwargs = {"nullable": c.get("nullable", True)} + if c["name"] == "id": + col_kwargs["primary_key"] = True + col_kwargs["autoincrement"] = True + fk_args = [] + for fk in fks_for_col.get(c["name"], []): + idx = fk["constrained_columns"].index(c["name"]) + target = f"{fk['referred_table']}.{fk['referred_columns'][idx]}" + options = {} + if fk.get("options", {}).get("ondelete"): + options["ondelete"] = fk["options"]["ondelete"] + if fk.get("name"): + options["name"] = fk["name"] + fk_args.append(sa.ForeignKey(target, **options)) + cols.append(sa.Column(c["name"], c["type"], *fk_args, **col_kwargs)) + return sa.Table(t.name, md, *cols) + + +def upgrade() -> None: + conn = op.get_bind() + _check_no_external_fks_to_id(conn) + insp = inspect(conn) + + for t in AFFECTED_TABLES: + # Run NULL-FK cleanup unconditionally: it is a no-op DELETE on tables + # whose FK columns are already NOT NULL (cheap), and skipping it on a + # table whose FK was nullable would leave the PK-add to fail with a + # cryptic constraint violation. Cf. ``TABLES_WITH_NULLABLE_FKS`` above + # for documentation of which tables are known to have nullable FKs. + _delete_null_fk_rows(conn, t) + _dedupe_by_min_id(conn, t) + _assert_no_duplicates(conn, t) + + # For the two tables with a pre-existing redundant UNIQUE + # (``dashboard_slices``, ``report_schedule_user``) build an explicit + # ``copy_from`` Table that omits the UNIQUE; this deterministically + # drops it across all dialects, including SQLite where unnamed + # constraints reflect with ``name=None`` and can't be dropped by + # name. For the other six tables, reflection-based default + # ``batch_alter_table`` (auto-detect) is fine since there's no + # UNIQUE to drop. On PostgreSQL/MySQL, direct ALTER avoids the + # temp-table index-name collision; on SQLite, the auto-detect picks + # ``recreate=True`` because PK changes need it. + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + # MySQL ERROR 1826: foreign-key constraint names are unique + # per-database, not per-table. ``recreate="always"`` builds + # ``_alembic_tmp_
`` with the original FK names from + # ``copy_from``, but the original table still holds those + # names until it's dropped, which fails on MySQL with + # ``Duplicate foreign key constraint name``. PostgreSQL and + # SQLite scope FK names per-table, so the recreate path + # works there as-is. Drop the original FKs by name first + # on MySQL; ``copy_from`` re-creates them on the rebuilt + # table with their original names. + if conn.dialect.name == "mysql": + for fk in insp.get_foreign_keys(t.name): + if fk_name := fk.get("name"): + op.drop_constraint(fk_name, t.name, type_="foreignkey") + with op.batch_alter_table( + t.name, + recreate="always", + copy_from=_build_pre_upgrade_table(insp, t), + ) as batch_op: + batch_op.drop_column("id") + batch_op.create_primary_key(f"pk_{t.name}", [t.fk1, t.fk2]) + # SQLite quirk: composite PRIMARY KEY does not promote the + # constituent columns to NOT NULL (only ``INTEGER PRIMARY + # KEY`` does). PostgreSQL and MySQL implicitly promote the + # PK columns to NOT NULL when the constraint is added, + # so the explicit ``alter_column`` is a no-op on those + # backends but enforces the post-upgrade contract on + # SQLite. Without it, ``INSERT (NULL, 5)`` would succeed + # on SQLite despite the columns being part of the PK. + batch_op.alter_column(t.fk1, existing_type=sa.Integer, nullable=False) + batch_op.alter_column(t.fk2, existing_type=sa.Integer, nullable=False) + else: + with op.batch_alter_table(t.name) as batch_op: + batch_op.drop_column("id") + batch_op.create_primary_key(f"pk_{t.name}", [t.fk1, t.fk2]) + # See comment above re: SQLite composite-PK NOT NULL quirk. + batch_op.alter_column(t.fk1, existing_type=sa.Integer, nullable=False) + batch_op.alter_column(t.fk2, existing_type=sa.Integer, nullable=False) + + +def downgrade() -> None: + # Inverse order: undo upgrade transformations from last-applied to + # first-applied. Within each table, drop the composite PK, restore the + # surrogate ``id`` column, and re-add the original ``UNIQUE`` constraint + # on the two tables that previously carried one. + # + # Note: FK columns remain NOT NULL after downgrade (intentional asymmetry + # — see UPDATING.md). Restoring the original nullable state would require + # an explicit ``alter_column`` per FK per table for no operator value; + # junction-table NULL FKs were always meaningless under ``secondary=`` + # semantics. + # The downgrade names the restored PK ``
_pkey`` (matching Postgres' + # default constraint-naming convention, which was the original constraint + # name before this migration ran) so a downgrade-then-upgrade round-trip + # doesn't collide on the upgrade's ``pk_
`` name. + # + # Adding a NOT NULL ``id`` column to a table with existing rows requires + # a default that fires on the existing rows. ``sa.Identity()`` (Postgres + # 10+ / MySQL 8+) and ``sa.Sequence`` (with explicit nextval) both + # backfill existing rows during ALTER TABLE; bare ``autoincrement=True`` + # does not. ``Identity`` is the modern portable choice. + conn = op.get_bind() + insp = inspect(conn) + is_mysql = conn.dialect.name == "mysql" + for t in reversed(AFFECTED_TABLES): + if is_mysql: + _downgrade_mysql_table(insp, t) + else: + with op.batch_alter_table(t.name) as batch_op: + batch_op.drop_constraint(f"pk_{t.name}", type_="primary") + batch_op.add_column( + sa.Column( + "id", + sa.Integer, + sa.Identity(always=False), + nullable=False, + ) + ) + batch_op.create_primary_key(f"{t.name}_pkey", ["id"]) + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + batch_op.create_unique_constraint( + f"uq_{t.name}_{t.fk1}_{t.fk2}", [t.fk1, t.fk2] + ) + + +def _downgrade_mysql_table( + insp: sa.engine.reflection.Inspector, t: AssociationTable +) -> None: + """MySQL-specific downgrade for one table. + + Two MySQL quirks force a dialect-specific path here: + + 1. **ERROR 1553 — ``Cannot drop index 'PRIMARY': needed in a foreign + key constraint``**. InnoDB uses the composite PK index to back the + FK on the leftmost column. Dropping the PK before the FKs orphans + that backing index. PostgreSQL and SQLite create separate indexes + for FK columns and don't need this dance. We drop the FKs first + and re-add them after the structural change. + + 2. **``Identity(always=False)`` on a non-PK column add does not emit + ``AUTO_INCREMENT`` on MySQL.** SQLAlchemy 1.4 only emits + ``AUTO_INCREMENT`` when the column has both ``Identity()`` and + ``primary_key=True`` at create time. Our portable path adds the + column first, then creates the PK separately — which works on + Postgres (the column gets ``GENERATED BY DEFAULT AS IDENTITY``) + and SQLite (``INTEGER PRIMARY KEY`` becomes a rowid alias) but + leaves MySQL without auto-generation, so existing rows can't be + backfilled and future ``INSERT`` statements fail with + ``Field 'id' doesn't have a default value``. The combined + ``DROP PRIMARY KEY, ADD COLUMN AUTO_INCREMENT, ADD PRIMARY KEY`` + in a single ALTER statement is the canonical MySQL idiom: MySQL + backfills existing rows with sequential values and the column + remains auto-incrementing for future inserts. + + Raw SQL is unavoidable here — there is no SQLAlchemy core equivalent + for the combined-ALTER form, and the constitution allows raw SQL for + dialect-specific DDL with no programmatic equivalent (preferring + triple-quoted strings for legibility). + """ + fks = insp.get_foreign_keys(t.name) + + for fk in fks: + if fk_name := fk.get("name"): + op.execute(f"ALTER TABLE `{t.name}` DROP FOREIGN KEY `{fk_name}`") + + op.execute( + f""" + ALTER TABLE `{t.name}` + DROP PRIMARY KEY, + ADD COLUMN id INT NOT NULL AUTO_INCREMENT, + ADD PRIMARY KEY (id) + """ + ) + + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + op.execute( + f""" + ALTER TABLE `{t.name}` + ADD UNIQUE INDEX `uq_{t.name}_{t.fk1}_{t.fk2}` + (`{t.fk1}`, `{t.fk2}`) + """ + ) + + for fk in fks: + ondelete = fk.get("options", {}).get("ondelete") + ondelete_clause = f" ON DELETE {ondelete}" if ondelete else "" + local_cols = ", ".join(f"`{c}`" for c in fk["constrained_columns"]) + ref_cols = ", ".join(f"`{c}`" for c in fk["referred_columns"]) + op.execute( + f""" + ALTER TABLE `{t.name}` + ADD CONSTRAINT `{fk["name"]}` + FOREIGN KEY ({local_cols}) + REFERENCES `{fk["referred_table"]}` ({ref_cols}) + {ondelete_clause} + """ + ) diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index f38d801719ea..bb1711678b70 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -35,7 +35,6 @@ String, Table, Text, - UniqueConstraint, ) from sqlalchemy.engine.base import Connection from sqlalchemy.orm import relationship, subqueryload @@ -93,37 +92,53 @@ def copy_dashboard(_mapper: Mapper, _connection: Connection, target: Dashboard) dashboard_slices = Table( "dashboard_slices", metadata, - Column("id", Integer, primary_key=True), - Column("dashboard_id", Integer, ForeignKey("dashboards.id", ondelete="CASCADE")), - Column("slice_id", Integer, ForeignKey("slices.id", ondelete="CASCADE")), - UniqueConstraint("dashboard_id", "slice_id"), + Column( + "dashboard_id", + Integer, + ForeignKey("dashboards.id", ondelete="CASCADE"), + primary_key=True, + ), + Column( + "slice_id", + Integer, + ForeignKey("slices.id", ondelete="CASCADE"), + primary_key=True, + ), ) dashboard_user = Table( "dashboard_user", metadata, - Column("id", Integer, primary_key=True), - Column("user_id", Integer, ForeignKey("ab_user.id", ondelete="CASCADE")), - Column("dashboard_id", Integer, ForeignKey("dashboards.id", ondelete="CASCADE")), + Column( + "user_id", + Integer, + ForeignKey("ab_user.id", ondelete="CASCADE"), + primary_key=True, + ), + Column( + "dashboard_id", + Integer, + ForeignKey("dashboards.id", ondelete="CASCADE"), + primary_key=True, + ), ) DashboardRoles = Table( "dashboard_roles", metadata, - Column("id", Integer, primary_key=True), Column( "dashboard_id", Integer, ForeignKey("dashboards.id", ondelete="CASCADE"), - nullable=False, + primary_key=True, ), Column( "role_id", Integer, ForeignKey("ab_role.id", ondelete="CASCADE"), - nullable=False, + primary_key=True, ), ) diff --git a/superset/models/slice.py b/superset/models/slice.py index 04c698ce95da..0012d082efb9 100644 --- a/superset/models/slice.py +++ b/superset/models/slice.py @@ -58,9 +58,18 @@ slice_user = Table( "slice_user", metadata, - Column("id", Integer, primary_key=True), - Column("user_id", Integer, ForeignKey("ab_user.id", ondelete="CASCADE")), - Column("slice_id", Integer, ForeignKey("slices.id", ondelete="CASCADE")), + Column( + "user_id", + Integer, + ForeignKey("ab_user.id", ondelete="CASCADE"), + primary_key=True, + ), + Column( + "slice_id", + Integer, + ForeignKey("slices.id", ondelete="CASCADE"), + primary_key=True, + ), ) logger = logging.getLogger(__name__) diff --git a/superset/reports/models.py b/superset/reports/models.py index f0abda8a9216..7564336ae11d 100644 --- a/superset/reports/models.py +++ b/superset/reports/models.py @@ -101,20 +101,18 @@ class ReportSourceFormat(StrEnum): report_schedule_user = Table( "report_schedule_user", metadata, - Column("id", Integer, primary_key=True), Column( "user_id", Integer, ForeignKey("ab_user.id", ondelete="CASCADE"), - nullable=False, + primary_key=True, ), Column( "report_schedule_id", Integer, ForeignKey("report_schedule.id", ondelete="CASCADE"), - nullable=False, + primary_key=True, ), - UniqueConstraint("user_id", "report_schedule_id"), ) diff --git a/tests/integration_tests/migrations/composite_pk_association_tables__tests.py b/tests/integration_tests/migrations/composite_pk_association_tables__tests.py new file mode 100644 index 000000000000..52b1942bdb24 --- /dev/null +++ b/tests/integration_tests/migrations/composite_pk_association_tables__tests.py @@ -0,0 +1,131 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Schema-shape assertion tests for the composite-PK association-tables +migration (revision 2bee73611e32). + +Builds the pre-migration shape against an isolated in-memory SQLite engine, +runs the migration's ``upgrade()``, and asserts the resulting shape matches +the data-model.md "After" specification: no ``id`` column, composite PK on +the two FK columns, and no redundant ``UNIQUE(fk1, fk2)`` on the two tables +that previously carried one. + +Continuum-restore verification is OUT OF SCOPE; that work lives in the +versioning epic (sc-103156). Cross-backend verification (PostgreSQL, MySQL) +is handled by the CI matrix (T034a). +""" + +from importlib import import_module + +import pytest +import sqlalchemy as sa +from alembic.migration import MigrationContext +from alembic.operations import Operations +from sqlalchemy import inspect + +# Import the migration module under test. +_migration = import_module( + "superset.migrations.versions." + "2026-05-01_23-36_2bee73611e32_composite_pk_association_tables" +) +AFFECTED_TABLES = _migration.AFFECTED_TABLES +TABLES_WITH_PRE_EXISTING_UNIQUE = _migration.TABLES_WITH_PRE_EXISTING_UNIQUE + + +@pytest.fixture(scope="module") +def post_upgrade_engine() -> sa.engine.Engine: + """An isolated in-memory SQLite engine with the migration applied to a + pre-migration-shaped seed schema. Used by the post-upgrade assertions + below. Module-scoped so the upgrade only runs once per test session.""" + engine = sa.create_engine("sqlite:///:memory:") + md = sa.MetaData() + for t in AFFECTED_TABLES: + cols: list[sa.SchemaItem] = [ + sa.Column("id", sa.Integer, primary_key=True), + sa.Column(t.fk1, sa.Integer, nullable=False), + sa.Column(t.fk2, sa.Integer, nullable=False), + ] + constraints: list[sa.SchemaItem] = [] + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + constraints.append(sa.UniqueConstraint(t.fk1, t.fk2)) + sa.Table(t.name, md, *cols, *constraints) + md.create_all(engine) + + # Apply the migration's upgrade() against this engine via Alembic's + # MigrationContext, patching the migration module's ``op`` reference. + with engine.connect() as conn: + ctx = MigrationContext.configure(conn) + ops = Operations(ctx) + original_op = _migration.op + _migration.op = ops # type: ignore[attr-defined] + try: + _migration.upgrade() + finally: + _migration.op = original_op # type: ignore[attr-defined] + return engine + + +@pytest.mark.parametrize("t", AFFECTED_TABLES, ids=lambda t: t.name) +def test_no_id_column(post_upgrade_engine: sa.engine.Engine, t) -> None: + """The synthetic ``id`` column is gone from each affected table.""" + insp = inspect(post_upgrade_engine) + column_names = {c["name"] for c in insp.get_columns(t.name)} + assert "id" not in column_names, ( + f"{t.name} still has an 'id' column after migration; " + f"composite-PK conversion incomplete" + ) + + +@pytest.mark.parametrize("t", AFFECTED_TABLES, ids=lambda t: t.name) +def test_primary_key_is_composite_fks(post_upgrade_engine: sa.engine.Engine, t) -> None: + """The primary key of each affected table is exactly ``(fk1, fk2)``.""" + insp = inspect(post_upgrade_engine) + pk_cols = set(insp.get_pk_constraint(t.name).get("constrained_columns", [])) + assert pk_cols == {t.fk1, t.fk2}, ( + f"{t.name} primary key is {pk_cols}, expected {{{t.fk1}, {t.fk2}}}" + ) + + +@pytest.mark.parametrize( + "t", + [t for t in AFFECTED_TABLES if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE], + ids=lambda t: t.name, +) +def test_redundant_unique_dropped(post_upgrade_engine: sa.engine.Engine, t) -> None: + """For the two tables that previously carried a UNIQUE(fk1, fk2), that + constraint is now subsumed by the composite PK and must not appear + separately in the unique-constraint list.""" + insp = inspect(post_upgrade_engine) + redundant_pair = {t.fk1, t.fk2} + for uc in insp.get_unique_constraints(t.name): + cols = set(uc.get("column_names", [])) + assert cols != redundant_pair, ( + f"{t.name} still carries a redundant UniqueConstraint over " + f"{redundant_pair} (name={uc.get('name')!r}); " + f"composite-PK conversion incomplete" + ) + + +@pytest.mark.parametrize("t", AFFECTED_TABLES, ids=lambda t: t.name) +def test_fk_columns_not_null(post_upgrade_engine: sa.engine.Engine, t) -> None: + """PK promotion implicitly tightens the FK columns to NOT NULL.""" + insp = inspect(post_upgrade_engine) + cols_by_name = {c["name"]: c for c in insp.get_columns(t.name)} + for col in (t.fk1, t.fk2): + assert col in cols_by_name, f"{t.name} missing column {col}" + assert cols_by_name[col].get("nullable") is False, ( + f"{t.name}.{col} is nullable; expected NOT NULL after PK promotion" + ) diff --git a/tests/integration_tests/migrations/composite_pk_round_trip__tests.py b/tests/integration_tests/migrations/composite_pk_round_trip__tests.py new file mode 100644 index 000000000000..d83c9d113c3f --- /dev/null +++ b/tests/integration_tests/migrations/composite_pk_round_trip__tests.py @@ -0,0 +1,168 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Schema round-trip tests for the composite-PK association-tables migration +(revision 2bee73611e32). Builds the pre-migration shape against an in-memory +SQLite engine, runs the migration's ``upgrade()``, asserts the post-upgrade +shape, runs ``downgrade()``, asserts the prior shape is restored (modulo the +documented FK NOT NULL asymmetry), and re-runs ``upgrade()`` to verify +idempotency. + +This is run against an isolated in-memory engine via Alembic's +``MigrationContext`` so the test does not perturb the project's test DB. + +Cross-backend verification of the same migration against PostgreSQL and +MySQL is delegated to the CI matrix (see T034a in tasks.md) and to the +quickstart.md verification (T033). This file covers the SQLite slice. +""" + +from importlib import import_module +from typing import Any + +import pytest +import sqlalchemy as sa +from alembic.migration import MigrationContext +from alembic.operations import Operations +from sqlalchemy import inspect + +# Import the migration module under test. +_migration = import_module( + "superset.migrations.versions." + "2026-05-01_23-36_2bee73611e32_composite_pk_association_tables" +) +AFFECTED_TABLES = _migration.AFFECTED_TABLES +TABLES_WITH_PRE_EXISTING_UNIQUE = _migration.TABLES_WITH_PRE_EXISTING_UNIQUE + + +def _build_pre_migration_schema(engine: sa.engine.Engine) -> None: + """Recreate the eight tables in their pre-migration shape (surrogate + ``id INTEGER PRIMARY KEY`` plus an optional ``UNIQUE(fk1, fk2)`` on the + two tables that previously carried one). FKs to parent tables are + omitted to keep the test self-contained — we're testing schema + transformations, not FK enforcement.""" + md = sa.MetaData() + for t in AFFECTED_TABLES: + cols: list[sa.Column] = [ + sa.Column("id", sa.Integer, primary_key=True), + sa.Column(t.fk1, sa.Integer, nullable=False), + sa.Column(t.fk2, sa.Integer, nullable=False), + ] + constraints: list[sa.SchemaItem] = [] + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + constraints.append(sa.UniqueConstraint(t.fk1, t.fk2)) + sa.Table(t.name, md, *cols, *constraints) + md.create_all(engine) + + +def _shape(engine: sa.engine.Engine, table: str) -> dict[str, Any]: + """Return a structural summary for asserting equality across runs.""" + insp = inspect(engine) + pk = insp.get_pk_constraint(table).get("constrained_columns", []) + columns = sorted(c["name"] for c in insp.get_columns(table)) + uniques = sorted( + tuple(sorted(uc.get("column_names", []))) + for uc in insp.get_unique_constraints(table) + ) + return {"columns": columns, "pk": sorted(pk), "uniques": uniques} + + +def _run_with_alembic_context(engine: sa.engine.Engine, fn) -> None: + """Run ``fn()`` (the migration's upgrade/downgrade body) inside a fresh + Alembic ``MigrationContext`` bound to ``engine``. Patches the + migration module's ``op`` to point at this context so its + ``op.get_bind()`` and ``op.batch_alter_table`` calls execute against + the in-memory engine.""" + with engine.connect() as conn: + ctx = MigrationContext.configure(conn) + ops = Operations(ctx) + original_op = _migration.op + _migration.op = ops # type: ignore[attr-defined] + try: + fn() + finally: + _migration.op = original_op # type: ignore[attr-defined] + + +def test_round_trip_against_in_memory_sqlite() -> None: + """Round-trip: pre-migration → upgrade → downgrade → upgrade again. + + Asserts: + - Post-upgrade shape: no ``id``, composite PK on (fk1, fk2), no + UNIQUE(fk1, fk2) on the two tables that previously carried one. + - Post-downgrade shape: ``id`` restored, PK back on (id), UNIQUE + re-added on the two tables. (FK columns remain NOT NULL — the + documented intentional asymmetry.) + - Post-re-upgrade idempotency: shape matches the first post-upgrade. + """ + engine = sa.create_engine("sqlite:///:memory:") + _build_pre_migration_schema(engine) + + pre_shape = {t.name: _shape(engine, t.name) for t in AFFECTED_TABLES} + + _run_with_alembic_context(engine, _migration.upgrade) + + for t in AFFECTED_TABLES: + s = _shape(engine, t.name) + assert "id" not in s["columns"], f"{t.name}: id still present post-upgrade: {s}" + assert s["pk"] == sorted([t.fk1, t.fk2]), ( + f"{t.name}: PK is {s['pk']}, expected {sorted([t.fk1, t.fk2])}" + ) + assert tuple(sorted([t.fk1, t.fk2])) not in s["uniques"], ( + f"{t.name}: redundant UNIQUE not dropped post-upgrade: {s['uniques']}" + ) + + post_upgrade_shape = {t.name: _shape(engine, t.name) for t in AFFECTED_TABLES} + + _run_with_alembic_context(engine, _migration.downgrade) + + for t in AFFECTED_TABLES: + s = _shape(engine, t.name) + assert "id" in s["columns"], f"{t.name}: id not restored post-downgrade: {s}" + assert s["pk"] == ["id"], f"{t.name}: PK is {s['pk']}, expected ['id']" + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + assert tuple(sorted([t.fk1, t.fk2])) in s["uniques"], ( + f"{t.name}: UNIQUE not restored post-downgrade: {s['uniques']}" + ) + + _run_with_alembic_context(engine, _migration.upgrade) + + re_upgrade_shape = {t.name: _shape(engine, t.name) for t in AFFECTED_TABLES} + assert re_upgrade_shape == post_upgrade_shape, ( + "Re-upgrade shape differs from initial upgrade shape — " + "migration is not idempotent. " + f"diff: {set(re_upgrade_shape.items()) ^ set(post_upgrade_shape.items())}" + ) + + # Use pre_shape only to demonstrate it was captured (not asserted against + # because the round-trip downgrade intentionally diverges on FK NOT NULL). + _ = pre_shape + + +def test_migration_module_constants_are_consistent() -> None: + """Sanity-check the migration module's exported constants. Catches + accidental edits that misalign AFFECTED_TABLES with the auxiliary sets.""" + affected_names = {t.name for t in AFFECTED_TABLES} + assert _migration.TABLES_WITH_PRE_EXISTING_UNIQUE.issubset(affected_names) + assert _migration.TABLES_WITH_NULLABLE_FKS.issubset(affected_names) + # Order is alphabetical (deterministic for review/bisection). + assert [t.name for t in AFFECTED_TABLES] == sorted(affected_names) + + +@pytest.mark.skipif(True, reason="placeholder — see test_round_trip above") +def test_placeholder_for_future_postgres_round_trip() -> None: + """Reserved slot for a future Postgres-specific round-trip if local + SQLite divergence ever needs to be cross-checked against the real + backend. Today's CI matrix (T034a) handles this implicitly.""" diff --git a/tests/unit_tests/migrations/composite_pk_association_tables_test.py b/tests/unit_tests/migrations/composite_pk_association_tables_test.py new file mode 100644 index 000000000000..05a69293a23b --- /dev/null +++ b/tests/unit_tests/migrations/composite_pk_association_tables_test.py @@ -0,0 +1,144 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for the composite-PK association-tables migration (revision +2bee73611e32). Verifies the post-migration constraint enforcement: duplicate +``(fk1, fk2)`` insertions fail with IntegrityError, distinct pairs succeed. + +Schema is built from the live ORM ``Table`` definitions via +``metadata.create_all(engine)`` against in-memory SQLite. This reflects the +post-T015–T018 ORM model state (composite-PK), independent of whether the +Alembic migration has run against the test DB. The two should agree. +""" + +import pytest +import sqlalchemy as sa +from sqlalchemy.exc import IntegrityError + +# (table_name, fk1_col, fk2_col, fk1_parent_table, fk2_parent_table) +# Parent-table names are needed to build the FK targets in the in-memory schema. +AFFECTED_TABLES = [ + ("dashboard_roles", "dashboard_id", "role_id", "dashboards", "ab_role"), + ("dashboard_slices", "dashboard_id", "slice_id", "dashboards", "slices"), + ("dashboard_user", "user_id", "dashboard_id", "ab_user", "dashboards"), + ( + "report_schedule_user", + "user_id", + "report_schedule_id", + "ab_user", + "report_schedule", + ), + ( + "rls_filter_roles", + "role_id", + "rls_filter_id", + "ab_role", + "row_level_security_filters", + ), + ( + "rls_filter_tables", + "table_id", + "rls_filter_id", + "tables", + "row_level_security_filters", + ), + ("slice_user", "user_id", "slice_id", "ab_user", "slices"), + ("sqlatable_user", "user_id", "table_id", "ab_user", "tables"), +] + + +def _build_in_memory_schema( + table_name: str, fk1: str, fk2: str, fk1_parent: str, fk2_parent: str +) -> tuple[sa.engine.Engine, sa.Table]: + """Build an in-memory SQLite schema with two minimal parent tables and + the junction table under test (composite-PK shape). Returns the engine + and the junction-table object for inserts.""" + metadata = sa.MetaData() + sa.Table( + fk1_parent, + metadata, + sa.Column("id", sa.Integer, primary_key=True), + ) + if fk2_parent != fk1_parent: + sa.Table( + fk2_parent, + metadata, + sa.Column("id", sa.Integer, primary_key=True), + ) + junction = sa.Table( + table_name, + metadata, + sa.Column( + fk1, + sa.Integer, + sa.ForeignKey(f"{fk1_parent}.id"), + primary_key=True, + ), + sa.Column( + fk2, + sa.Integer, + sa.ForeignKey(f"{fk2_parent}.id"), + primary_key=True, + ), + ) + engine = sa.create_engine("sqlite:///:memory:") + metadata.create_all(engine) + # Seed parent rows so the FK constraints can be satisfied. + # Identifiers come from the AFFECTED_TABLES test parameter list, not user input. + with engine.begin() as conn: + conn.execute( + sa.text(f"INSERT INTO {fk1_parent} (id) VALUES (1), (2)") # noqa: S608 + ) + if fk2_parent != fk1_parent: + conn.execute( + sa.text(f"INSERT INTO {fk2_parent} (id) VALUES (1), (2)") # noqa: S608 + ) + return engine, junction + + +@pytest.mark.parametrize("table,fk1,fk2,fk1_parent,fk2_parent", AFFECTED_TABLES) +def test_duplicate_insert_rejected( + table: str, fk1: str, fk2: str, fk1_parent: str, fk2_parent: str +) -> None: + """Inserting the same ``(fk1, fk2)`` pair twice raises ``IntegrityError``. + + Verifies SC-004 / FR-007 — the composite primary key enforces uniqueness + at the database level on every affected table. + """ + engine, junction = _build_in_memory_schema(table, fk1, fk2, fk1_parent, fk2_parent) + with engine.begin() as conn: + conn.execute(junction.insert().values({fk1: 1, fk2: 1})) + with pytest.raises(IntegrityError): + conn.execute(junction.insert().values({fk1: 1, fk2: 1})) + + +@pytest.mark.parametrize("table,fk1,fk2,fk1_parent,fk2_parent", AFFECTED_TABLES) +def test_distinct_pairs_accepted( + table: str, fk1: str, fk2: str, fk1_parent: str, fk2_parent: str +) -> None: + """Two distinct ``(fk1, fk2)`` pairs both succeed. + + Sanity check that the PK isn't accidentally a single-column constraint + (which would reject ``(1, 1)`` and ``(1, 2)`` as a duplicate on column 1). + """ + engine, junction = _build_in_memory_schema(table, fk1, fk2, fk1_parent, fk2_parent) + with engine.begin() as conn: + conn.execute(junction.insert().values({fk1: 1, fk2: 1})) + conn.execute(junction.insert().values({fk1: 1, fk2: 2})) + result = conn.execute( + sa.text(f"SELECT COUNT(*) FROM {table}") # noqa: S608 + ).scalar_one() + assert result == 2