From 866db958c5b51836fdb33309c81989f8395a2947 Mon Sep 17 00:00:00 2001 From: Mike Bridge Date: Mon, 4 May 2026 09:54:25 -0600 Subject: [PATCH 1/8] refactor(db): composite PK on M2M association tables (sc-105349) Replace synthetic id INTEGER PRIMARY KEY with composite PRIMARY KEY (fk1, fk2) on the eight pure-junction tables: dashboard_roles, dashboard_slices, dashboard_user, report_schedule_user, rls_filter_roles, rls_filter_tables, slice_user, sqlatable_user. The redundant UNIQUE(fk1, fk2) on dashboard_slices and report_schedule_user is dropped (subsumed by the new PK). Migration handles dialect quirks: copy_from for tables with pre-existing UNIQUE (so SQLite's anonymous-constraint reflection doesn't matter), wrapped- subquery dedupe for MySQL (ERROR 1093), sa.Identity(always=False) on downgrade to backfill the restored id column without NOT NULL violations, and distinct PK names per direction (pk_ on upgrade,
_pkey on downgrade) to avoid round-trip index-name collisions on Postgres. ORM Table() definitions updated to match. UPDATING.md entry added with operator runbook (BI-tool impact, pre-flight inventory queries, dedupe-row- loss notice, pg_dump workaround, FK-NOT-NULL downgrade asymmetry note). Tests: 8 schema-shape assertions (post-upgrade), 8 duplicate-rejection unit tests, 8 distinct-pair sanity tests, 1 round-trip + idempotency test (in-memory SQLite via Alembic MigrationContext). Continuum-restore verification against the new shape is out of scope for this PR; it is the responsibility of the versioning epic (sc-103156). Co-Authored-By: Claude Opus 4.7 (1M context) --- UPDATING.md | 47 +++ superset/connectors/sqla/models.py | 35 ++- ...3611e32_composite_pk_association_tables.py | 289 ++++++++++++++++++ superset/models/dashboard.py | 37 ++- superset/models/slice.py | 15 +- superset/reports/models.py | 6 +- .../composite_pk_association_tables__tests.py | 131 ++++++++ .../composite_pk_round_trip__tests.py | 168 ++++++++++ .../composite_pk_association_tables_test.py | 132 ++++++++ 9 files changed, 833 insertions(+), 27 deletions(-) create mode 100644 superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py create mode 100644 tests/integration_tests/migrations/composite_pk_association_tables__tests.py create mode 100644 tests/integration_tests/migrations/composite_pk_round_trip__tests.py create mode 100644 tests/unit_tests/migrations/composite_pk_association_tables_test.py diff --git a/UPDATING.md b/UPDATING.md index 3d42f2b3d4e1..1ece31a1c6fd 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -310,6 +310,53 @@ See `superset/mcp_service/PRODUCTION.md` for deployment guides. } ``` +### Composite primary keys on many-to-many association tables + +The eight M:N association tables listed below have been changed from a synthetic surrogate `id INTEGER PRIMARY KEY` to a composite `PRIMARY KEY (fk1, fk2)` on the two foreign-key columns. The `id` column is dropped, and the two tables that previously carried a redundant `UNIQUE (fk1, fk2)` constraint have that constraint removed (it is now subsumed by the composite primary key). + +**Affected tables and their composite-PK column pairs:** + +| Table | Composite PK | +|---|---| +| `dashboard_roles` | `(dashboard_id, role_id)` | +| `dashboard_slices` | `(dashboard_id, slice_id)` | +| `dashboard_user` | `(user_id, dashboard_id)` | +| `report_schedule_user` | `(user_id, report_schedule_id)` | +| `rls_filter_roles` | `(role_id, rls_filter_id)` | +| `rls_filter_tables` | `(table_id, rls_filter_id)` | +| `slice_user` | `(user_id, slice_id)` | +| `sqlatable_user` | `(user_id, table_id)` | + +**Impact on external readers:** Any BI tool, custom report, backup script, or external integration that references these tables by their old surrogate `id` column (e.g., `SELECT id FROM dashboard_slices WHERE …`, `WHERE dashboard_slices.id IN (…)`) will break. Update such queries to project or filter on the FK pair (`dashboard_id, slice_id`) instead. The FK columns themselves are unchanged. + +**Pre-flight inventory queries.** Before applying the upgrade, operators are encouraged to run the queries below against their database to assess what the migration will change. Two classes of pre-existing data are not preserved by the migration: duplicate `(fk1, fk2)` rows (the migration keeps `MIN(id)` and deletes the rest) and rows with `NULL` in either FK column (the migration deletes them, since FK columns are promoted to `NOT NULL` for the composite PK). Compliance- or audit-sensitive operators should also `\copy` (Postgres) or `SELECT … INTO OUTFILE` (MySQL) the affected rows for their own records before upgrading. + +```sql +-- Duplicate (fk1, fk2) pairs (the migration will keep MIN(id) per group, delete the rest) +SELECT dashboard_id, role_id, COUNT(*) FROM dashboard_roles GROUP BY dashboard_id, role_id HAVING COUNT(*) > 1; +SELECT dashboard_id, slice_id, COUNT(*) FROM dashboard_slices GROUP BY dashboard_id, slice_id HAVING COUNT(*) > 1; +SELECT user_id, dashboard_id, COUNT(*) FROM dashboard_user GROUP BY user_id, dashboard_id HAVING COUNT(*) > 1; +SELECT user_id, report_schedule_id, COUNT(*) FROM report_schedule_user GROUP BY user_id, report_schedule_id HAVING COUNT(*) > 1; +SELECT role_id, rls_filter_id, COUNT(*) FROM rls_filter_roles GROUP BY role_id, rls_filter_id HAVING COUNT(*) > 1; +SELECT table_id, rls_filter_id, COUNT(*) FROM rls_filter_tables GROUP BY table_id, rls_filter_id HAVING COUNT(*) > 1; +SELECT user_id, slice_id, COUNT(*) FROM slice_user GROUP BY user_id, slice_id HAVING COUNT(*) > 1; +SELECT user_id, table_id, COUNT(*) FROM sqlatable_user GROUP BY user_id, table_id HAVING COUNT(*) > 1; + +-- Rows with a NULL in either FK (the migration will delete these) +SELECT COUNT(*) FROM dashboard_roles WHERE dashboard_id IS NULL OR role_id IS NULL; +SELECT COUNT(*) FROM dashboard_slices WHERE dashboard_id IS NULL OR slice_id IS NULL; +SELECT COUNT(*) FROM dashboard_user WHERE user_id IS NULL OR dashboard_id IS NULL; +SELECT COUNT(*) FROM report_schedule_user WHERE user_id IS NULL OR report_schedule_id IS NULL; +SELECT COUNT(*) FROM rls_filter_roles WHERE role_id IS NULL OR rls_filter_id IS NULL; +SELECT COUNT(*) FROM rls_filter_tables WHERE table_id IS NULL OR rls_filter_id IS NULL; +SELECT COUNT(*) FROM slice_user WHERE user_id IS NULL OR slice_id IS NULL; +SELECT COUNT(*) FROM sqlatable_user WHERE user_id IS NULL OR table_id IS NULL; +``` + +**Restoring an old `pg_dump` (or equivalent) against the new schema.** A dump taken before the migration includes `INSERT` statements that populate the now-removed `id` column. Restoring such a dump against the post-migration schema will fail. The supported workaround is to dump only the schema and reference data, then re-create the M:N associations from application data after restore — for example with `pg_dump --exclude-table-data` (or per-table `--exclude-table-data=dashboard_slices` etc.) for the eight junction tables, restore the rest, then run a one-shot script that re-INSERTs `(fk1, fk2)` pairs derived from your application export. Operators who need to restore an old dump verbatim should restore against a pre-migration Superset and then re-run the upgrade. + +**Intentional downgrade asymmetry.** The migration's `downgrade()` restores the surrogate `id` column and (for `dashboard_slices` and `report_schedule_user`) the original `UNIQUE (fk1, fk2)` constraint, but it does **not** restore the original `NULL`-allowed state on the FK columns — they remain `NOT NULL`. This is intentional: under SQLAlchemy's `secondary=` semantics, a `NULL` in either FK column of a junction table is meaningless (it cannot participate in either side of the relationship). Operators downgrading are not expected to need this restored. The asymmetry is documented for completeness so that round-trip schema diffs are not mistaken for migration bugs. + ## 6.0.0 - [33055](https://github.com/apache/superset/pull/33055): Upgrades Flask-AppBuilder to 5.0.0. The AUTH_OID authentication type has been deprecated and is no longer available as an option in Flask-AppBuilder. OpenID (OID) is considered a deprecated authentication protocol - if you are using AUTH_OID, you will need to migrate to an alternative authentication method such as OAuth, LDAP, or database authentication before upgrading. - [34871](https://github.com/apache/superset/pull/34871): Fixed Jest test hanging issue from Ant Design v5 upgrade. MessageChannel is now mocked in test environment to prevent rc-overflow from causing Jest to hang. Test environment only - no production impact. diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 6fb132e16137..b080b64aff49 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1212,9 +1212,18 @@ def data(self) -> dict[str, Any]: sqlatable_user = DBTable( "sqlatable_user", metadata, - Column("id", Integer, primary_key=True), - Column("user_id", Integer, ForeignKey("ab_user.id", ondelete="CASCADE")), - Column("table_id", Integer, ForeignKey("tables.id", ondelete="CASCADE")), + Column( + "user_id", + Integer, + ForeignKey("ab_user.id", ondelete="CASCADE"), + primary_key=True, + ), + Column( + "table_id", + Integer, + ForeignKey("tables.id", ondelete="CASCADE"), + primary_key=True, + ), ) @@ -2146,17 +2155,25 @@ def text(self, clause: str) -> TextClause: RLSFilterRoles = DBTable( "rls_filter_roles", metadata, - Column("id", Integer, primary_key=True), - Column("role_id", Integer, ForeignKey("ab_role.id"), nullable=False), - Column("rls_filter_id", Integer, ForeignKey("row_level_security_filters.id")), + Column("role_id", Integer, ForeignKey("ab_role.id"), primary_key=True), + Column( + "rls_filter_id", + Integer, + ForeignKey("row_level_security_filters.id"), + primary_key=True, + ), ) RLSFilterTables = DBTable( "rls_filter_tables", metadata, - Column("id", Integer, primary_key=True), - Column("table_id", Integer, ForeignKey("tables.id")), - Column("rls_filter_id", Integer, ForeignKey("row_level_security_filters.id")), + Column("table_id", Integer, ForeignKey("tables.id"), primary_key=True), + Column( + "rls_filter_id", + Integer, + ForeignKey("row_level_security_filters.id"), + primary_key=True, + ), ) diff --git a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py new file mode 100644 index 000000000000..2c841bc6171a --- /dev/null +++ b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py @@ -0,0 +1,289 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""composite_pk_association_tables + +Replace the unused synthetic ``id INTEGER PRIMARY KEY`` on eight many-to-many +association tables with a composite primary key on the two FK columns. Drops +the now-redundant ``UniqueConstraint(fk1, fk2)`` on the two tables that +already carry one. Pre-flight: deletes rows with NULL FK values (six tables +allow them today) and any duplicate ``(fk1, fk2)`` rows. + +Motivated by SQLAlchemy-Continuum issue #129 (M2M restore against junction +tables with surrogate PKs); also closes the data-integrity hole where six +of the eight tables lacked DB-level uniqueness. + +Revision ID: 2bee73611e32 +Revises: ce6bd21901ab +Create Date: 2026-05-01 23:36:34.050058 + +""" + +import logging +from typing import NamedTuple + +import sqlalchemy as sa +from alembic import op +from sqlalchemy import inspect +from sqlalchemy.engine import Connection + +# revision identifiers, used by Alembic. +revision = "2bee73611e32" +down_revision = "ce6bd21901ab" + +logger = logging.getLogger("alembic.env") + + +class AssociationTable(NamedTuple): + """A junction table being converted from surrogate-id PK to composite-FK PK.""" + + name: str + fk1: str + fk2: str + + +# Order is alphabetical by table name; deterministic for review and bisection. +AFFECTED_TABLES: list[AssociationTable] = [ + AssociationTable("dashboard_roles", "dashboard_id", "role_id"), + AssociationTable("dashboard_slices", "dashboard_id", "slice_id"), + AssociationTable("dashboard_user", "user_id", "dashboard_id"), + AssociationTable("report_schedule_user", "user_id", "report_schedule_id"), + AssociationTable("rls_filter_roles", "role_id", "rls_filter_id"), + AssociationTable("rls_filter_tables", "table_id", "rls_filter_id"), + AssociationTable("slice_user", "user_id", "slice_id"), + AssociationTable("sqlatable_user", "user_id", "table_id"), +] + +# These two tables already declare ``UniqueConstraint(fk1, fk2)`` in the model; +# the composite PK subsumes it, so the migration drops the redundant constraint. +TABLES_WITH_PRE_EXISTING_UNIQUE: set[str] = { + "dashboard_slices", + "report_schedule_user", +} + +# Six tables whose FK columns are nullable today. Promoting an FK to a primary +# key column makes it NOT NULL, so any existing NULL-FK rows would block the +# PK-add. We delete them in pre-flight (a junction-table row with a NULL FK +# is meaningless under SQLAlchemy ``secondary=`` semantics anyway). +TABLES_WITH_NULLABLE_FKS: set[str] = { + "dashboard_slices", + "dashboard_user", + "rls_filter_roles", + "rls_filter_tables", + "slice_user", + "sqlatable_user", +} + + +def _check_no_external_fks_to_id(conn: Connection) -> None: + """Raise ``RuntimeError`` if any foreign key in the database references one + of the eight junction-table ``id`` columns. Uses SQLAlchemy's ``Inspector`` + for dialect-agnostic introspection across PostgreSQL, MySQL, and SQLite.""" + affected = {t.name for t in AFFECTED_TABLES} + insp = inspect(conn) + for table_name in insp.get_table_names(): + if table_name in affected: + continue + for fk in insp.get_foreign_keys(table_name): + if fk["referred_table"] in affected and "id" in fk["referred_columns"]: + raise RuntimeError( + f"Cannot drop synthetic id from {fk['referred_table']}: " + f"external FK {fk.get('name', '')} on {table_name} " + f"references {fk['referred_table']}({fk['referred_columns']}). " + f"Drop or migrate the referencing FK before applying this " + f"migration." + ) + + +def _delete_null_fk_rows(conn: Connection, t: AssociationTable) -> int: + """Delete rows where ``t.fk1`` or ``t.fk2`` is NULL on ``t.name``. + + Returns the deletion count. Called only on tables in + ``TABLES_WITH_NULLABLE_FKS``. Required because primary-key columns must be + NOT NULL; the PK-add downstream would fail with a cryptic constraint + violation if any NULL-FK rows survived. + """ + # Identifiers come from the AFFECTED_TABLES whitelist, not user input. + sql = sa.text( + f"DELETE FROM {t.name} WHERE {t.fk1} IS NULL OR {t.fk2} IS NULL" # noqa: S608 + ) + result = conn.execute(sql) + n = result.rowcount or 0 + if n: + logger.warning( + "Deleted %d row(s) with NULL FK from %s before composite-PK promotion", + n, + t.name, + ) + return n + + +def _dedupe_by_min_id(conn: Connection, t: AssociationTable) -> int: + """Delete duplicate ``(t.fk1, t.fk2)`` rows from ``t.name`` keeping ``MIN(id)``. + + Returns the deletion count. Uses the wrapped-subquery form for MySQL + portability — MySQL rejects ``DELETE FROM t WHERE id NOT IN (SELECT MIN(id) + FROM t GROUP BY ...)`` with ERROR 1093 unless the inner SELECT is wrapped + to force materialization. + """ + # Identifiers come from the AFFECTED_TABLES whitelist, not user input. + sql = sa.text( + f"DELETE FROM {t.name} WHERE id NOT IN (" # noqa: S608 + f" SELECT keep_id FROM (" + f" SELECT MIN(id) AS keep_id FROM {t.name} " + f"GROUP BY {t.fk1}, {t.fk2}" + f" ) AS s" + f")" + ) + result = conn.execute(sql) + n = result.rowcount or 0 + if n: + logger.warning("Deduped %d duplicate row(s) from %s", n, t.name) + return n + + +def _assert_no_duplicates(conn: Connection, t: AssociationTable) -> None: + """Raise ``RuntimeError`` if any ``(t.fk1, t.fk2)`` duplicate group remains. + + Called after ``_dedupe_by_min_id`` to surface silent dialect-dependent + dedupe failures (e.g., a MySQL syntax issue) as an actionable error + before the PK-add fires with a less-helpful constraint-violation message. + """ + # Identifiers come from the AFFECTED_TABLES whitelist, not user input. + sql = sa.text( + f"SELECT COUNT(*) FROM (" # noqa: S608 + f" SELECT 1 FROM {t.name} GROUP BY {t.fk1}, {t.fk2} HAVING COUNT(*) > 1" + f") AS s" + ) + if remaining := conn.scalar(sql) or 0: + raise RuntimeError( + f"Dedupe failed for {t.name}: {remaining} duplicate " + f"({t.fk1}, {t.fk2}) groups remain after _dedupe_by_min_id. " + f"Check the dedupe SQL for dialect {conn.dialect.name}." + ) + + +def _build_pre_upgrade_table( + insp: sa.engine.reflection.Inspector, t: AssociationTable +) -> sa.Table: + """Build a ``Table`` object representing the pre-upgrade schema of ``t``, + explicitly *without* any redundant ``UniqueConstraint(t.fk1, t.fk2)``. + Used as ``copy_from`` to ``batch_alter_table`` so the rebuilt table + omits the unnamed UNIQUE constraint deterministically across dialects + (SQLite reflects unnamed UNIQUEs with ``name=None``, defeating the + standard ``batch_op.drop_constraint(name)`` path). + + Reflects column types and FK targets (with original FK constraint names + preserved) from the live database; only the redundant UNIQUE is omitted. + """ + md = sa.MetaData() + fks_for_col: dict[str, list[dict]] = {} + for fk in insp.get_foreign_keys(t.name): + for col_name in fk["constrained_columns"]: + fks_for_col.setdefault(col_name, []).append(fk) + + cols: list[sa.Column] = [] + for c in insp.get_columns(t.name): + col_kwargs = {"nullable": c.get("nullable", True)} + if c["name"] == "id": + col_kwargs["primary_key"] = True + col_kwargs["autoincrement"] = True + fk_args = [] + for fk in fks_for_col.get(c["name"], []): + idx = fk["constrained_columns"].index(c["name"]) + target = f"{fk['referred_table']}.{fk['referred_columns'][idx]}" + options = {} + if fk.get("options", {}).get("ondelete"): + options["ondelete"] = fk["options"]["ondelete"] + if fk.get("name"): + options["name"] = fk["name"] + fk_args.append(sa.ForeignKey(target, **options)) + cols.append(sa.Column(c["name"], c["type"], *fk_args, **col_kwargs)) + return sa.Table(t.name, md, *cols) + + +def upgrade() -> None: + conn = op.get_bind() + _check_no_external_fks_to_id(conn) + insp = inspect(conn) + + for t in AFFECTED_TABLES: + if t.name in TABLES_WITH_NULLABLE_FKS: + _delete_null_fk_rows(conn, t) + _dedupe_by_min_id(conn, t) + _assert_no_duplicates(conn, t) + + # For the two tables with a pre-existing redundant UNIQUE + # (``dashboard_slices``, ``report_schedule_user``) build an explicit + # ``copy_from`` Table that omits the UNIQUE; this deterministically + # drops it across all dialects, including SQLite where unnamed + # constraints reflect with ``name=None`` and can't be dropped by + # name. For the other six tables, reflection-based default + # ``batch_alter_table`` (auto-detect) is fine since there's no + # UNIQUE to drop. On PostgreSQL/MySQL, direct ALTER avoids the + # temp-table index-name collision; on SQLite, the auto-detect picks + # ``recreate=True`` because PK changes need it. + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + with op.batch_alter_table( + t.name, + recreate="always", + copy_from=_build_pre_upgrade_table(insp, t), + ) as batch_op: + batch_op.drop_column("id") + batch_op.create_primary_key(f"pk_{t.name}", [t.fk1, t.fk2]) + else: + with op.batch_alter_table(t.name) as batch_op: + batch_op.drop_column("id") + batch_op.create_primary_key(f"pk_{t.name}", [t.fk1, t.fk2]) + + +def downgrade() -> None: + # Inverse order: undo upgrade transformations from last-applied to + # first-applied. Within each table, drop the composite PK, restore the + # surrogate ``id`` column, and re-add the original ``UNIQUE`` constraint + # on the two tables that previously carried one. + # + # Note: FK columns remain NOT NULL after downgrade (intentional asymmetry + # — see UPDATING.md). Restoring the original nullable state would require + # an explicit ``alter_column`` per FK per table for no operator value; + # junction-table NULL FKs were always meaningless under ``secondary=`` + # semantics. + # The downgrade names the restored PK ``
_pkey`` (matching Postgres' + # default constraint-naming convention, which was the original constraint + # name before this migration ran) so a downgrade-then-upgrade round-trip + # doesn't collide on the upgrade's ``pk_
`` name. + # + # Adding a NOT NULL ``id`` column to a table with existing rows requires + # a default that fires on the existing rows. ``sa.Identity()`` (Postgres + # 10+ / MySQL 8+) and ``sa.Sequence`` (with explicit nextval) both + # backfill existing rows during ALTER TABLE; bare ``autoincrement=True`` + # does not. ``Identity`` is the modern portable choice. + for t in reversed(AFFECTED_TABLES): + with op.batch_alter_table(t.name) as batch_op: + batch_op.drop_constraint(f"pk_{t.name}", type_="primary") + batch_op.add_column( + sa.Column( + "id", + sa.Integer, + sa.Identity(always=False), + nullable=False, + ) + ) + batch_op.create_primary_key(f"{t.name}_pkey", ["id"]) + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + batch_op.create_unique_constraint( + f"uq_{t.name}_{t.fk1}_{t.fk2}", [t.fk1, t.fk2] + ) diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index f38d801719ea..bb1711678b70 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -35,7 +35,6 @@ String, Table, Text, - UniqueConstraint, ) from sqlalchemy.engine.base import Connection from sqlalchemy.orm import relationship, subqueryload @@ -93,37 +92,53 @@ def copy_dashboard(_mapper: Mapper, _connection: Connection, target: Dashboard) dashboard_slices = Table( "dashboard_slices", metadata, - Column("id", Integer, primary_key=True), - Column("dashboard_id", Integer, ForeignKey("dashboards.id", ondelete="CASCADE")), - Column("slice_id", Integer, ForeignKey("slices.id", ondelete="CASCADE")), - UniqueConstraint("dashboard_id", "slice_id"), + Column( + "dashboard_id", + Integer, + ForeignKey("dashboards.id", ondelete="CASCADE"), + primary_key=True, + ), + Column( + "slice_id", + Integer, + ForeignKey("slices.id", ondelete="CASCADE"), + primary_key=True, + ), ) dashboard_user = Table( "dashboard_user", metadata, - Column("id", Integer, primary_key=True), - Column("user_id", Integer, ForeignKey("ab_user.id", ondelete="CASCADE")), - Column("dashboard_id", Integer, ForeignKey("dashboards.id", ondelete="CASCADE")), + Column( + "user_id", + Integer, + ForeignKey("ab_user.id", ondelete="CASCADE"), + primary_key=True, + ), + Column( + "dashboard_id", + Integer, + ForeignKey("dashboards.id", ondelete="CASCADE"), + primary_key=True, + ), ) DashboardRoles = Table( "dashboard_roles", metadata, - Column("id", Integer, primary_key=True), Column( "dashboard_id", Integer, ForeignKey("dashboards.id", ondelete="CASCADE"), - nullable=False, + primary_key=True, ), Column( "role_id", Integer, ForeignKey("ab_role.id", ondelete="CASCADE"), - nullable=False, + primary_key=True, ), ) diff --git a/superset/models/slice.py b/superset/models/slice.py index 04c698ce95da..0012d082efb9 100644 --- a/superset/models/slice.py +++ b/superset/models/slice.py @@ -58,9 +58,18 @@ slice_user = Table( "slice_user", metadata, - Column("id", Integer, primary_key=True), - Column("user_id", Integer, ForeignKey("ab_user.id", ondelete="CASCADE")), - Column("slice_id", Integer, ForeignKey("slices.id", ondelete="CASCADE")), + Column( + "user_id", + Integer, + ForeignKey("ab_user.id", ondelete="CASCADE"), + primary_key=True, + ), + Column( + "slice_id", + Integer, + ForeignKey("slices.id", ondelete="CASCADE"), + primary_key=True, + ), ) logger = logging.getLogger(__name__) diff --git a/superset/reports/models.py b/superset/reports/models.py index f0abda8a9216..7564336ae11d 100644 --- a/superset/reports/models.py +++ b/superset/reports/models.py @@ -101,20 +101,18 @@ class ReportSourceFormat(StrEnum): report_schedule_user = Table( "report_schedule_user", metadata, - Column("id", Integer, primary_key=True), Column( "user_id", Integer, ForeignKey("ab_user.id", ondelete="CASCADE"), - nullable=False, + primary_key=True, ), Column( "report_schedule_id", Integer, ForeignKey("report_schedule.id", ondelete="CASCADE"), - nullable=False, + primary_key=True, ), - UniqueConstraint("user_id", "report_schedule_id"), ) diff --git a/tests/integration_tests/migrations/composite_pk_association_tables__tests.py b/tests/integration_tests/migrations/composite_pk_association_tables__tests.py new file mode 100644 index 000000000000..52b1942bdb24 --- /dev/null +++ b/tests/integration_tests/migrations/composite_pk_association_tables__tests.py @@ -0,0 +1,131 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Schema-shape assertion tests for the composite-PK association-tables +migration (revision 2bee73611e32). + +Builds the pre-migration shape against an isolated in-memory SQLite engine, +runs the migration's ``upgrade()``, and asserts the resulting shape matches +the data-model.md "After" specification: no ``id`` column, composite PK on +the two FK columns, and no redundant ``UNIQUE(fk1, fk2)`` on the two tables +that previously carried one. + +Continuum-restore verification is OUT OF SCOPE; that work lives in the +versioning epic (sc-103156). Cross-backend verification (PostgreSQL, MySQL) +is handled by the CI matrix (T034a). +""" + +from importlib import import_module + +import pytest +import sqlalchemy as sa +from alembic.migration import MigrationContext +from alembic.operations import Operations +from sqlalchemy import inspect + +# Import the migration module under test. +_migration = import_module( + "superset.migrations.versions." + "2026-05-01_23-36_2bee73611e32_composite_pk_association_tables" +) +AFFECTED_TABLES = _migration.AFFECTED_TABLES +TABLES_WITH_PRE_EXISTING_UNIQUE = _migration.TABLES_WITH_PRE_EXISTING_UNIQUE + + +@pytest.fixture(scope="module") +def post_upgrade_engine() -> sa.engine.Engine: + """An isolated in-memory SQLite engine with the migration applied to a + pre-migration-shaped seed schema. Used by the post-upgrade assertions + below. Module-scoped so the upgrade only runs once per test session.""" + engine = sa.create_engine("sqlite:///:memory:") + md = sa.MetaData() + for t in AFFECTED_TABLES: + cols: list[sa.SchemaItem] = [ + sa.Column("id", sa.Integer, primary_key=True), + sa.Column(t.fk1, sa.Integer, nullable=False), + sa.Column(t.fk2, sa.Integer, nullable=False), + ] + constraints: list[sa.SchemaItem] = [] + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + constraints.append(sa.UniqueConstraint(t.fk1, t.fk2)) + sa.Table(t.name, md, *cols, *constraints) + md.create_all(engine) + + # Apply the migration's upgrade() against this engine via Alembic's + # MigrationContext, patching the migration module's ``op`` reference. + with engine.connect() as conn: + ctx = MigrationContext.configure(conn) + ops = Operations(ctx) + original_op = _migration.op + _migration.op = ops # type: ignore[attr-defined] + try: + _migration.upgrade() + finally: + _migration.op = original_op # type: ignore[attr-defined] + return engine + + +@pytest.mark.parametrize("t", AFFECTED_TABLES, ids=lambda t: t.name) +def test_no_id_column(post_upgrade_engine: sa.engine.Engine, t) -> None: + """The synthetic ``id`` column is gone from each affected table.""" + insp = inspect(post_upgrade_engine) + column_names = {c["name"] for c in insp.get_columns(t.name)} + assert "id" not in column_names, ( + f"{t.name} still has an 'id' column after migration; " + f"composite-PK conversion incomplete" + ) + + +@pytest.mark.parametrize("t", AFFECTED_TABLES, ids=lambda t: t.name) +def test_primary_key_is_composite_fks(post_upgrade_engine: sa.engine.Engine, t) -> None: + """The primary key of each affected table is exactly ``(fk1, fk2)``.""" + insp = inspect(post_upgrade_engine) + pk_cols = set(insp.get_pk_constraint(t.name).get("constrained_columns", [])) + assert pk_cols == {t.fk1, t.fk2}, ( + f"{t.name} primary key is {pk_cols}, expected {{{t.fk1}, {t.fk2}}}" + ) + + +@pytest.mark.parametrize( + "t", + [t for t in AFFECTED_TABLES if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE], + ids=lambda t: t.name, +) +def test_redundant_unique_dropped(post_upgrade_engine: sa.engine.Engine, t) -> None: + """For the two tables that previously carried a UNIQUE(fk1, fk2), that + constraint is now subsumed by the composite PK and must not appear + separately in the unique-constraint list.""" + insp = inspect(post_upgrade_engine) + redundant_pair = {t.fk1, t.fk2} + for uc in insp.get_unique_constraints(t.name): + cols = set(uc.get("column_names", [])) + assert cols != redundant_pair, ( + f"{t.name} still carries a redundant UniqueConstraint over " + f"{redundant_pair} (name={uc.get('name')!r}); " + f"composite-PK conversion incomplete" + ) + + +@pytest.mark.parametrize("t", AFFECTED_TABLES, ids=lambda t: t.name) +def test_fk_columns_not_null(post_upgrade_engine: sa.engine.Engine, t) -> None: + """PK promotion implicitly tightens the FK columns to NOT NULL.""" + insp = inspect(post_upgrade_engine) + cols_by_name = {c["name"]: c for c in insp.get_columns(t.name)} + for col in (t.fk1, t.fk2): + assert col in cols_by_name, f"{t.name} missing column {col}" + assert cols_by_name[col].get("nullable") is False, ( + f"{t.name}.{col} is nullable; expected NOT NULL after PK promotion" + ) diff --git a/tests/integration_tests/migrations/composite_pk_round_trip__tests.py b/tests/integration_tests/migrations/composite_pk_round_trip__tests.py new file mode 100644 index 000000000000..d83c9d113c3f --- /dev/null +++ b/tests/integration_tests/migrations/composite_pk_round_trip__tests.py @@ -0,0 +1,168 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Schema round-trip tests for the composite-PK association-tables migration +(revision 2bee73611e32). Builds the pre-migration shape against an in-memory +SQLite engine, runs the migration's ``upgrade()``, asserts the post-upgrade +shape, runs ``downgrade()``, asserts the prior shape is restored (modulo the +documented FK NOT NULL asymmetry), and re-runs ``upgrade()`` to verify +idempotency. + +This is run against an isolated in-memory engine via Alembic's +``MigrationContext`` so the test does not perturb the project's test DB. + +Cross-backend verification of the same migration against PostgreSQL and +MySQL is delegated to the CI matrix (see T034a in tasks.md) and to the +quickstart.md verification (T033). This file covers the SQLite slice. +""" + +from importlib import import_module +from typing import Any + +import pytest +import sqlalchemy as sa +from alembic.migration import MigrationContext +from alembic.operations import Operations +from sqlalchemy import inspect + +# Import the migration module under test. +_migration = import_module( + "superset.migrations.versions." + "2026-05-01_23-36_2bee73611e32_composite_pk_association_tables" +) +AFFECTED_TABLES = _migration.AFFECTED_TABLES +TABLES_WITH_PRE_EXISTING_UNIQUE = _migration.TABLES_WITH_PRE_EXISTING_UNIQUE + + +def _build_pre_migration_schema(engine: sa.engine.Engine) -> None: + """Recreate the eight tables in their pre-migration shape (surrogate + ``id INTEGER PRIMARY KEY`` plus an optional ``UNIQUE(fk1, fk2)`` on the + two tables that previously carried one). FKs to parent tables are + omitted to keep the test self-contained — we're testing schema + transformations, not FK enforcement.""" + md = sa.MetaData() + for t in AFFECTED_TABLES: + cols: list[sa.Column] = [ + sa.Column("id", sa.Integer, primary_key=True), + sa.Column(t.fk1, sa.Integer, nullable=False), + sa.Column(t.fk2, sa.Integer, nullable=False), + ] + constraints: list[sa.SchemaItem] = [] + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + constraints.append(sa.UniqueConstraint(t.fk1, t.fk2)) + sa.Table(t.name, md, *cols, *constraints) + md.create_all(engine) + + +def _shape(engine: sa.engine.Engine, table: str) -> dict[str, Any]: + """Return a structural summary for asserting equality across runs.""" + insp = inspect(engine) + pk = insp.get_pk_constraint(table).get("constrained_columns", []) + columns = sorted(c["name"] for c in insp.get_columns(table)) + uniques = sorted( + tuple(sorted(uc.get("column_names", []))) + for uc in insp.get_unique_constraints(table) + ) + return {"columns": columns, "pk": sorted(pk), "uniques": uniques} + + +def _run_with_alembic_context(engine: sa.engine.Engine, fn) -> None: + """Run ``fn()`` (the migration's upgrade/downgrade body) inside a fresh + Alembic ``MigrationContext`` bound to ``engine``. Patches the + migration module's ``op`` to point at this context so its + ``op.get_bind()`` and ``op.batch_alter_table`` calls execute against + the in-memory engine.""" + with engine.connect() as conn: + ctx = MigrationContext.configure(conn) + ops = Operations(ctx) + original_op = _migration.op + _migration.op = ops # type: ignore[attr-defined] + try: + fn() + finally: + _migration.op = original_op # type: ignore[attr-defined] + + +def test_round_trip_against_in_memory_sqlite() -> None: + """Round-trip: pre-migration → upgrade → downgrade → upgrade again. + + Asserts: + - Post-upgrade shape: no ``id``, composite PK on (fk1, fk2), no + UNIQUE(fk1, fk2) on the two tables that previously carried one. + - Post-downgrade shape: ``id`` restored, PK back on (id), UNIQUE + re-added on the two tables. (FK columns remain NOT NULL — the + documented intentional asymmetry.) + - Post-re-upgrade idempotency: shape matches the first post-upgrade. + """ + engine = sa.create_engine("sqlite:///:memory:") + _build_pre_migration_schema(engine) + + pre_shape = {t.name: _shape(engine, t.name) for t in AFFECTED_TABLES} + + _run_with_alembic_context(engine, _migration.upgrade) + + for t in AFFECTED_TABLES: + s = _shape(engine, t.name) + assert "id" not in s["columns"], f"{t.name}: id still present post-upgrade: {s}" + assert s["pk"] == sorted([t.fk1, t.fk2]), ( + f"{t.name}: PK is {s['pk']}, expected {sorted([t.fk1, t.fk2])}" + ) + assert tuple(sorted([t.fk1, t.fk2])) not in s["uniques"], ( + f"{t.name}: redundant UNIQUE not dropped post-upgrade: {s['uniques']}" + ) + + post_upgrade_shape = {t.name: _shape(engine, t.name) for t in AFFECTED_TABLES} + + _run_with_alembic_context(engine, _migration.downgrade) + + for t in AFFECTED_TABLES: + s = _shape(engine, t.name) + assert "id" in s["columns"], f"{t.name}: id not restored post-downgrade: {s}" + assert s["pk"] == ["id"], f"{t.name}: PK is {s['pk']}, expected ['id']" + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + assert tuple(sorted([t.fk1, t.fk2])) in s["uniques"], ( + f"{t.name}: UNIQUE not restored post-downgrade: {s['uniques']}" + ) + + _run_with_alembic_context(engine, _migration.upgrade) + + re_upgrade_shape = {t.name: _shape(engine, t.name) for t in AFFECTED_TABLES} + assert re_upgrade_shape == post_upgrade_shape, ( + "Re-upgrade shape differs from initial upgrade shape — " + "migration is not idempotent. " + f"diff: {set(re_upgrade_shape.items()) ^ set(post_upgrade_shape.items())}" + ) + + # Use pre_shape only to demonstrate it was captured (not asserted against + # because the round-trip downgrade intentionally diverges on FK NOT NULL). + _ = pre_shape + + +def test_migration_module_constants_are_consistent() -> None: + """Sanity-check the migration module's exported constants. Catches + accidental edits that misalign AFFECTED_TABLES with the auxiliary sets.""" + affected_names = {t.name for t in AFFECTED_TABLES} + assert _migration.TABLES_WITH_PRE_EXISTING_UNIQUE.issubset(affected_names) + assert _migration.TABLES_WITH_NULLABLE_FKS.issubset(affected_names) + # Order is alphabetical (deterministic for review/bisection). + assert [t.name for t in AFFECTED_TABLES] == sorted(affected_names) + + +@pytest.mark.skipif(True, reason="placeholder — see test_round_trip above") +def test_placeholder_for_future_postgres_round_trip() -> None: + """Reserved slot for a future Postgres-specific round-trip if local + SQLite divergence ever needs to be cross-checked against the real + backend. Today's CI matrix (T034a) handles this implicitly.""" diff --git a/tests/unit_tests/migrations/composite_pk_association_tables_test.py b/tests/unit_tests/migrations/composite_pk_association_tables_test.py new file mode 100644 index 000000000000..6c3115edaf65 --- /dev/null +++ b/tests/unit_tests/migrations/composite_pk_association_tables_test.py @@ -0,0 +1,132 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for the composite-PK association-tables migration (revision +2bee73611e32). Verifies the post-migration constraint enforcement: duplicate +``(fk1, fk2)`` insertions fail with IntegrityError, distinct pairs succeed. + +Schema is built from the live ORM ``Table`` definitions via +``metadata.create_all(engine)`` against in-memory SQLite. This reflects the +post-T015–T018 ORM model state (composite-PK), independent of whether the +Alembic migration has run against the test DB. The two should agree. +""" + +import pytest +import sqlalchemy as sa +from sqlalchemy.exc import IntegrityError + +# (table_name, fk1_col, fk2_col, fk1_parent_table, fk2_parent_table) +# Parent-table names are needed to build the FK targets in the in-memory schema. +AFFECTED_TABLES = [ + ("dashboard_roles", "dashboard_id", "role_id", "dashboards", "ab_role"), + ("dashboard_slices", "dashboard_id", "slice_id", "dashboards", "slices"), + ("dashboard_user", "user_id", "dashboard_id", "ab_user", "dashboards"), + ( + "report_schedule_user", + "user_id", + "report_schedule_id", + "ab_user", + "report_schedule", + ), + ("rls_filter_roles", "role_id", "rls_filter_id", "ab_role", "rls_filter"), + ("rls_filter_tables", "table_id", "rls_filter_id", "tables", "rls_filter"), + ("slice_user", "user_id", "slice_id", "ab_user", "slices"), + ("sqlatable_user", "user_id", "table_id", "ab_user", "tables"), +] + + +def _build_in_memory_schema( + table_name: str, fk1: str, fk2: str, fk1_parent: str, fk2_parent: str +) -> tuple[sa.engine.Engine, sa.Table]: + """Build an in-memory SQLite schema with two minimal parent tables and + the junction table under test (composite-PK shape). Returns the engine + and the junction-table object for inserts.""" + metadata = sa.MetaData() + sa.Table( + fk1_parent, + metadata, + sa.Column("id", sa.Integer, primary_key=True), + ) + if fk2_parent != fk1_parent: + sa.Table( + fk2_parent, + metadata, + sa.Column("id", sa.Integer, primary_key=True), + ) + junction = sa.Table( + table_name, + metadata, + sa.Column( + fk1, + sa.Integer, + sa.ForeignKey(f"{fk1_parent}.id"), + primary_key=True, + ), + sa.Column( + fk2, + sa.Integer, + sa.ForeignKey(f"{fk2_parent}.id"), + primary_key=True, + ), + ) + engine = sa.create_engine("sqlite:///:memory:") + metadata.create_all(engine) + # Seed parent rows so the FK constraints can be satisfied. + # Identifiers come from the AFFECTED_TABLES test parameter list, not user input. + with engine.begin() as conn: + conn.execute( + sa.text(f"INSERT INTO {fk1_parent} (id) VALUES (1), (2)") # noqa: S608 + ) + if fk2_parent != fk1_parent: + conn.execute( + sa.text(f"INSERT INTO {fk2_parent} (id) VALUES (1), (2)") # noqa: S608 + ) + return engine, junction + + +@pytest.mark.parametrize("table,fk1,fk2,fk1_parent,fk2_parent", AFFECTED_TABLES) +def test_duplicate_insert_rejected( + table: str, fk1: str, fk2: str, fk1_parent: str, fk2_parent: str +) -> None: + """Inserting the same ``(fk1, fk2)`` pair twice raises ``IntegrityError``. + + Verifies SC-004 / FR-007 — the composite primary key enforces uniqueness + at the database level on every affected table. + """ + engine, junction = _build_in_memory_schema(table, fk1, fk2, fk1_parent, fk2_parent) + with engine.begin() as conn: + conn.execute(junction.insert().values({fk1: 1, fk2: 1})) + with pytest.raises(IntegrityError): + conn.execute(junction.insert().values({fk1: 1, fk2: 1})) + + +@pytest.mark.parametrize("table,fk1,fk2,fk1_parent,fk2_parent", AFFECTED_TABLES) +def test_distinct_pairs_accepted( + table: str, fk1: str, fk2: str, fk1_parent: str, fk2_parent: str +) -> None: + """Two distinct ``(fk1, fk2)`` pairs both succeed. + + Sanity check that the PK isn't accidentally a single-column constraint + (which would reject ``(1, 1)`` and ``(1, 2)`` as a duplicate on column 1). + """ + engine, junction = _build_in_memory_schema(table, fk1, fk2, fk1_parent, fk2_parent) + with engine.begin() as conn: + conn.execute(junction.insert().values({fk1: 1, fk2: 1})) + conn.execute(junction.insert().values({fk1: 1, fk2: 2})) + result = conn.execute( + sa.text(f"SELECT COUNT(*) FROM {table}") # noqa: S608 + ).scalar_one() + assert result == 2 From c8bfd1cc4d9f2f6967607837a40ad74d48654dcd Mon Sep 17 00:00:00 2001 From: Mike Bridge Date: Mon, 4 May 2026 10:14:59 -0600 Subject: [PATCH 2/8] fix(migration): always run NULL-FK cleanup; correct RLS test parent name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two cleanups from PR review: 1. ``dashboard_roles.dashboard_id`` was created nullable in revision e11ccdd12658 but was missing from ``TABLES_WITH_NULLABLE_FKS``. A production database with a stray NULL ``dashboard_id`` row would have failed the PK-add with a cryptic constraint violation. Fix by running the NULL-FK cleanup on every affected table — it is a no-op DELETE on tables whose FK columns are already NOT NULL, and it eliminates the risk of further drift in the hardcoded set. ``dashboard_roles`` is added to the documentation set; the runtime now does not consult it. 2. The unit-test parent-table name for ``rls_filter_roles`` and ``rls_filter_tables`` was ``rls_filter`` (does not exist) instead of the real parent ``row_level_security_filters``. Test passes either way (the in-memory FK is self-consistent), but the parameter is now accurate. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...3611e32_composite_pk_association_tables.py | 20 +++++++++++++------ .../composite_pk_association_tables_test.py | 16 +++++++++++++-- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py index 2c841bc6171a..ec637de0118d 100644 --- a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py +++ b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py @@ -74,11 +74,15 @@ class AssociationTable(NamedTuple): "report_schedule_user", } -# Six tables whose FK columns are nullable today. Promoting an FK to a primary -# key column makes it NOT NULL, so any existing NULL-FK rows would block the -# PK-add. We delete them in pre-flight (a junction-table row with a NULL FK -# is meaningless under SQLAlchemy ``secondary=`` semantics anyway). +# Tables whose FK columns are nullable in their original create_table +# migrations. ``dashboard_roles.dashboard_id`` (created in revision +# e11ccdd12658) is nullable; ``report_schedule_user`` is the only association +# table that was created with both FK columns ``NOT NULL``. The pre-flight +# NULL-FK cleanup is a cheap no-op DELETE when run against tables whose FKs +# are already NOT NULL, so we run it on every affected table to avoid drift +# bugs from this set going stale. TABLES_WITH_NULLABLE_FKS: set[str] = { + "dashboard_roles", "dashboard_slices", "dashboard_user", "rls_filter_roles", @@ -221,8 +225,12 @@ def upgrade() -> None: insp = inspect(conn) for t in AFFECTED_TABLES: - if t.name in TABLES_WITH_NULLABLE_FKS: - _delete_null_fk_rows(conn, t) + # Run NULL-FK cleanup unconditionally: it is a no-op DELETE on tables + # whose FK columns are already NOT NULL (cheap), and skipping it on a + # table whose FK was nullable would leave the PK-add to fail with a + # cryptic constraint violation. Cf. ``TABLES_WITH_NULLABLE_FKS`` above + # for documentation of which tables are known to have nullable FKs. + _delete_null_fk_rows(conn, t) _dedupe_by_min_id(conn, t) _assert_no_duplicates(conn, t) diff --git a/tests/unit_tests/migrations/composite_pk_association_tables_test.py b/tests/unit_tests/migrations/composite_pk_association_tables_test.py index 6c3115edaf65..05a69293a23b 100644 --- a/tests/unit_tests/migrations/composite_pk_association_tables_test.py +++ b/tests/unit_tests/migrations/composite_pk_association_tables_test.py @@ -41,8 +41,20 @@ "ab_user", "report_schedule", ), - ("rls_filter_roles", "role_id", "rls_filter_id", "ab_role", "rls_filter"), - ("rls_filter_tables", "table_id", "rls_filter_id", "tables", "rls_filter"), + ( + "rls_filter_roles", + "role_id", + "rls_filter_id", + "ab_role", + "row_level_security_filters", + ), + ( + "rls_filter_tables", + "table_id", + "rls_filter_id", + "tables", + "row_level_security_filters", + ), ("slice_user", "user_id", "slice_id", "ab_user", "slices"), ("sqlatable_user", "user_id", "table_id", "ab_user", "tables"), ] From a35d8f3b37ade6e4b3a7a64687837bb2006a47d5 Mon Sep 17 00:00:00 2001 From: Mike Bridge Date: Mon, 4 May 2026 10:38:40 -0600 Subject: [PATCH 3/8] docs(migration): address SQLAlchemy review follow-ups Four operator-experience improvements from the second review pass: 1. ``TABLES_WITH_NULLABLE_FKS`` is now explicitly documented as an informational set that is not consulted at runtime; the comment explains the previous ``dashboard_roles`` omission was the bug that motivated the always-run cleanup. 2. ``_delete_null_fk_rows`` docstring updated to match the "always run" semantics (was still claiming "called only on tables in TABLES_WITH_NULLABLE_FKS"). 3. ``_check_no_external_fks_to_id`` now documents its scope limitation: ``Inspector.get_table_names()`` returns the default schema only, so cross-schema FKs in non-standard multi-schema PostgreSQL deployments would not be caught. The single-schema case (Superset's documented deployment) is fully covered. 4. ``_dedupe_by_min_id`` now logs a sample of up to 10 discarded ``(fk1, fk2, id)`` tuples at WARN before deletion, so operators can audit which rows the ``MIN(id)`` policy drops. The keep- original policy is correct in practice but discards later re-grants on ownership tables; the sample makes that visible. 5. ``UPDATING.md`` documents the upgrade/downgrade primary-key name divergence (``pk_
`` vs ``
_pkey``) so operators using schema-comparison tools don't mistake it for migration drift. No schema or runtime-behaviour changes. All 44 migration tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- UPDATING.md | 2 + ...3611e32_composite_pk_association_tables.py | 59 +++++++++++++++---- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 1ece31a1c6fd..6bcb75136142 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -357,6 +357,8 @@ SELECT COUNT(*) FROM sqlatable_user WHERE user_id IS NULL OR table_id IS NULL; **Intentional downgrade asymmetry.** The migration's `downgrade()` restores the surrogate `id` column and (for `dashboard_slices` and `report_schedule_user`) the original `UNIQUE (fk1, fk2)` constraint, but it does **not** restore the original `NULL`-allowed state on the FK columns — they remain `NOT NULL`. This is intentional: under SQLAlchemy's `secondary=` semantics, a `NULL` in either FK column of a junction table is meaningless (it cannot participate in either side of the relationship). Operators downgrading are not expected to need this restored. The asymmetry is documented for completeness so that round-trip schema diffs are not mistaken for migration bugs. +**Constraint-name divergence between upgrade and downgrade.** The composite primary key created on upgrade is named `pk_
` (Alembic's default for `op.create_primary_key("pk_
", ...)`), while the surrogate `id` primary key restored on downgrade is named `
_pkey` (PostgreSQL's default convention for `PrimaryKeyConstraint("id")`). The two names alternate so that a round-trip (upgrade → downgrade → upgrade) does not collide on a pre-existing constraint name. Operators using schema-comparison tools (e.g. `pg_diff`, `migra`) against a downgraded database may see this as drift versus a fresh-install schema. It is cosmetic — no application code references either constraint name. + ## 6.0.0 - [33055](https://github.com/apache/superset/pull/33055): Upgrades Flask-AppBuilder to 5.0.0. The AUTH_OID authentication type has been deprecated and is no longer available as an option in Flask-AppBuilder. OpenID (OID) is considered a deprecated authentication protocol - if you are using AUTH_OID, you will need to migrate to an alternative authentication method such as OAuth, LDAP, or database authentication before upgrading. - [34871](https://github.com/apache/superset/pull/34871): Fixed Jest test hanging issue from Ant Design v5 upgrade. MessageChannel is now mocked in test environment to prevent rc-overflow from causing Jest to hang. Test environment only - no production impact. diff --git a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py index ec637de0118d..398e96cb755f 100644 --- a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py +++ b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py @@ -74,13 +74,15 @@ class AssociationTable(NamedTuple): "report_schedule_user", } -# Tables whose FK columns are nullable in their original create_table -# migrations. ``dashboard_roles.dashboard_id`` (created in revision -# e11ccdd12658) is nullable; ``report_schedule_user`` is the only association -# table that was created with both FK columns ``NOT NULL``. The pre-flight -# NULL-FK cleanup is a cheap no-op DELETE when run against tables whose FKs -# are already NOT NULL, so we run it on every affected table to avoid drift -# bugs from this set going stale. +# Documentation set: tables whose FK columns are nullable in their original +# create_table migrations (``dashboard_roles.dashboard_id`` from revision +# e11ccdd12658 is the most recent addition). ``report_schedule_user`` is the +# only affected table created with both FK columns ``NOT NULL`` and is +# intentionally absent here. This set is no longer consulted at runtime — the +# upgrade now runs the NULL-FK cleanup on every affected table because the +# DELETE is a cheap no-op when the columns are already NOT NULL, and that +# eliminates the risk of bugs from this set going stale (the +# ``dashboard_roles`` omission caught in PR review was exactly that bug). TABLES_WITH_NULLABLE_FKS: set[str] = { "dashboard_roles", "dashboard_slices", @@ -95,7 +97,18 @@ class AssociationTable(NamedTuple): def _check_no_external_fks_to_id(conn: Connection) -> None: """Raise ``RuntimeError`` if any foreign key in the database references one of the eight junction-table ``id`` columns. Uses SQLAlchemy's ``Inspector`` - for dialect-agnostic introspection across PostgreSQL, MySQL, and SQLite.""" + for dialect-agnostic introspection across PostgreSQL, MySQL, and SQLite. + + Scope limitation: ``Inspector.get_table_names()`` returns tables in the + connection's default schema only. On PostgreSQL deployments where Superset + metadata lives in a non-default schema, or on multi-schema deployments + that allow cross-schema FKs, an external FK in another schema would not + be detected. This is acceptable for the standard single-schema + deployment that Superset documents; operators with multi-schema + metadata should run the equivalent inventory query against + ``information_schema.referential_constraints`` themselves before + applying. + """ affected = {t.name for t in AFFECTED_TABLES} insp = inspect(conn) for table_name in insp.get_table_names(): @@ -115,10 +128,10 @@ def _check_no_external_fks_to_id(conn: Connection) -> None: def _delete_null_fk_rows(conn: Connection, t: AssociationTable) -> int: """Delete rows where ``t.fk1`` or ``t.fk2`` is NULL on ``t.name``. - Returns the deletion count. Called only on tables in - ``TABLES_WITH_NULLABLE_FKS``. Required because primary-key columns must be + Returns the deletion count. Required because primary-key columns must be NOT NULL; the PK-add downstream would fail with a cryptic constraint - violation if any NULL-FK rows survived. + violation if any NULL-FK rows survived. Run unconditionally on every + affected table — see ``TABLES_WITH_NULLABLE_FKS`` above for the rationale. """ # Identifiers come from the AFFECTED_TABLES whitelist, not user input. sql = sa.text( @@ -142,8 +155,22 @@ def _dedupe_by_min_id(conn: Connection, t: AssociationTable) -> int: portability — MySQL rejects ``DELETE FROM t WHERE id NOT IN (SELECT MIN(id) FROM t GROUP BY ...)`` with ERROR 1093 unless the inner SELECT is wrapped to force materialization. + + Logs a sample (up to 10) of the discarded ``(fk1, fk2, id)`` tuples at + WARN before deletion, so operators can audit which rows are dropped — the + "keep ``MIN(id)``" policy preserves the original row, which is correct + in practice but discards any later, semantically-identical re-grants. """ # Identifiers come from the AFFECTED_TABLES whitelist, not user input. + sample_sql = sa.text( + f"SELECT {t.fk1}, {t.fk2}, id FROM {t.name} WHERE id NOT IN (" # noqa: S608 + f" SELECT keep_id FROM (" + f" SELECT MIN(id) AS keep_id FROM {t.name} " + f"GROUP BY {t.fk1}, {t.fk2}" + f" ) AS s" + f") LIMIT 10" + ) + sample = list(conn.execute(sample_sql)) sql = sa.text( f"DELETE FROM {t.name} WHERE id NOT IN (" # noqa: S608 f" SELECT keep_id FROM (" @@ -155,7 +182,15 @@ def _dedupe_by_min_id(conn: Connection, t: AssociationTable) -> int: result = conn.execute(sql) n = result.rowcount or 0 if n: - logger.warning("Deduped %d duplicate row(s) from %s", n, t.name) + logger.warning( + "Deduped %d duplicate row(s) from %s; sample of discarded " + "(%s, %s, id) tuples (up to 10): %s", + n, + t.name, + t.fk1, + t.fk2, + sample, + ) return n From 5cafed2e08163a73559658443a9c2ab91d960342 Mon Sep 17 00:00:00 2001 From: Mike Bridge Date: Mon, 4 May 2026 15:35:14 -0600 Subject: [PATCH 4/8] refactor(migration): build pre-flight SQL via SQLAlchemy core (review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Beto's review comments on apache/superset#39859: replace ``sa.text(f"...")`` SQL construction in the three pre-flight helpers (``_delete_null_fk_rows``, ``_dedupe_by_min_id``, ``_assert_no_duplicates``) with SQLAlchemy core constructs (``sa.delete``, ``sa.select``, ``sa.func``, ``.subquery()``, ``.notin_()``). A small ``_table_clause()`` helper builds a lightweight ``TableClause`` exposing the columns the queries reference; the three helpers consume it. Removes all ``# noqa: S608`` comments — they are no longer needed because there is no string-interpolated SQL. Verified the compiled SQL is identical on Postgres, MySQL, and SQLite, including the MySQL ERROR 1093 workaround (the inner aggregation is wrapped in a derived table via ``.subquery()``, producing ``... NOT IN (SELECT keep_id FROM (SELECT min(id) ...) AS keep_min)``). Also drops the redundant ``f`` prefix on the two non-interpolating lines of the ``_check_no_external_fks_to_id`` error message. 44 migration tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...3611e32_composite_pk_association_tables.py | 83 ++++++++++--------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py index 398e96cb755f..8a128bfd7461 100644 --- a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py +++ b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py @@ -120,11 +120,19 @@ def _check_no_external_fks_to_id(conn: Connection) -> None: f"Cannot drop synthetic id from {fk['referred_table']}: " f"external FK {fk.get('name', '')} on {table_name} " f"references {fk['referred_table']}({fk['referred_columns']}). " - f"Drop or migrate the referencing FK before applying this " - f"migration." + "Drop or migrate the referencing FK before applying this " + "migration." ) +def _table_clause(t: AssociationTable) -> sa.sql.expression.TableClause: + """Build a lightweight SQLAlchemy ``TableClause`` for ``t`` exposing the + columns the helper queries reference (``id``, ``fk1``, ``fk2``). Used so + that the dedupe / cleanup / assert SQL can be expressed via SQLAlchemy + core constructs rather than via string interpolation.""" + return sa.table(t.name, sa.column("id"), sa.column(t.fk1), sa.column(t.fk2)) + + def _delete_null_fk_rows(conn: Connection, t: AssociationTable) -> int: """Delete rows where ``t.fk1`` or ``t.fk2`` is NULL on ``t.name``. @@ -133,11 +141,9 @@ def _delete_null_fk_rows(conn: Connection, t: AssociationTable) -> int: violation if any NULL-FK rows survived. Run unconditionally on every affected table — see ``TABLES_WITH_NULLABLE_FKS`` above for the rationale. """ - # Identifiers come from the AFFECTED_TABLES whitelist, not user input. - sql = sa.text( - f"DELETE FROM {t.name} WHERE {t.fk1} IS NULL OR {t.fk2} IS NULL" # noqa: S608 - ) - result = conn.execute(sql) + tbl = _table_clause(t) + stmt = sa.delete(tbl).where(sa.or_(tbl.c[t.fk1].is_(None), tbl.c[t.fk2].is_(None))) + result = conn.execute(stmt) n = result.rowcount or 0 if n: logger.warning( @@ -151,35 +157,35 @@ def _delete_null_fk_rows(conn: Connection, t: AssociationTable) -> int: def _dedupe_by_min_id(conn: Connection, t: AssociationTable) -> int: """Delete duplicate ``(t.fk1, t.fk2)`` rows from ``t.name`` keeping ``MIN(id)``. - Returns the deletion count. Uses the wrapped-subquery form for MySQL - portability — MySQL rejects ``DELETE FROM t WHERE id NOT IN (SELECT MIN(id) - FROM t GROUP BY ...)`` with ERROR 1093 unless the inner SELECT is wrapped - to force materialization. + Returns the deletion count. The ``NOT IN`` argument is wrapped in an + extra ``SELECT keep_id FROM (...) AS s`` derived table because MySQL + rejects ``DELETE FROM t WHERE id NOT IN (SELECT MIN(id) FROM t GROUP BY + ...)`` with ERROR 1093 unless the inner SELECT is materialized through + a derived table. SQLAlchemy's ``.subquery()`` produces that wrap. Logs a sample (up to 10) of the discarded ``(fk1, fk2, id)`` tuples at - WARN before deletion, so operators can audit which rows are dropped — the - "keep ``MIN(id)``" policy preserves the original row, which is correct - in practice but discards any later, semantically-identical re-grants. + WARN before deletion, so operators can audit which rows are dropped — + the "keep ``MIN(id)``" policy preserves the original row, which is + correct in practice but discards any later, semantically-identical + re-grants. """ - # Identifiers come from the AFFECTED_TABLES whitelist, not user input. - sample_sql = sa.text( - f"SELECT {t.fk1}, {t.fk2}, id FROM {t.name} WHERE id NOT IN (" # noqa: S608 - f" SELECT keep_id FROM (" - f" SELECT MIN(id) AS keep_id FROM {t.name} " - f"GROUP BY {t.fk1}, {t.fk2}" - f" ) AS s" - f") LIMIT 10" + tbl = _table_clause(t) + + keep_min = ( + sa.select(sa.func.min(tbl.c.id).label("keep_id")) + .group_by(tbl.c[t.fk1], tbl.c[t.fk2]) + .subquery("keep_min") ) - sample = list(conn.execute(sample_sql)) - sql = sa.text( - f"DELETE FROM {t.name} WHERE id NOT IN (" # noqa: S608 - f" SELECT keep_id FROM (" - f" SELECT MIN(id) AS keep_id FROM {t.name} " - f"GROUP BY {t.fk1}, {t.fk2}" - f" ) AS s" - f")" + keep_ids = sa.select(keep_min.c.keep_id) + discarded = tbl.c.id.notin_(keep_ids) + + sample_stmt = ( + sa.select(tbl.c[t.fk1], tbl.c[t.fk2], tbl.c.id).where(discarded).limit(10) ) - result = conn.execute(sql) + sample = list(conn.execute(sample_stmt)) + + delete_stmt = sa.delete(tbl).where(discarded) + result = conn.execute(delete_stmt) n = result.rowcount or 0 if n: logger.warning( @@ -201,13 +207,16 @@ def _assert_no_duplicates(conn: Connection, t: AssociationTable) -> None: dedupe failures (e.g., a MySQL syntax issue) as an actionable error before the PK-add fires with a less-helpful constraint-violation message. """ - # Identifiers come from the AFFECTED_TABLES whitelist, not user input. - sql = sa.text( - f"SELECT COUNT(*) FROM (" # noqa: S608 - f" SELECT 1 FROM {t.name} GROUP BY {t.fk1}, {t.fk2} HAVING COUNT(*) > 1" - f") AS s" + tbl = _table_clause(t) + duplicate_groups = ( + sa.select(sa.literal(1)) + .select_from(tbl) + .group_by(tbl.c[t.fk1], tbl.c[t.fk2]) + .having(sa.func.count() > 1) + .subquery("duplicate_groups") ) - if remaining := conn.scalar(sql) or 0: + count_stmt = sa.select(sa.func.count()).select_from(duplicate_groups) + if remaining := conn.scalar(count_stmt) or 0: raise RuntimeError( f"Dedupe failed for {t.name}: {remaining} duplicate " f"({t.fk1}, {t.fk2}) groups remain after _dedupe_by_min_id. " From 48a4694ce8a4290097e86efbec248066b1efe81d Mon Sep 17 00:00:00 2001 From: Mike Bridge Date: Mon, 4 May 2026 16:01:58 -0600 Subject: [PATCH 5/8] fix(migration): drop FKs before recreate on MySQL (sc-105349) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI test-mysql failed with: MySQLdb.OperationalError: (1826, "Duplicate foreign key constraint name 'fk_dashboard_slices_slice_id_slices'") Root cause: MySQL scopes foreign-key constraint names per-database, not per-table (PostgreSQL and SQLite scope per-table). The ``batch_alter_table(... recreate="always", copy_from=...)`` path used for ``dashboard_slices`` and ``report_schedule_user`` builds ``_alembic_tmp_
`` carrying the original FK names from ``copy_from`` while the original table still holds those names — MySQL rejects the temp-table creation with ERROR 1826. Fix: on MySQL only, drop the original FK constraints by name before the ``batch_alter_table`` runs. The ``copy_from`` re-creates them on the rebuilt table with their original names, so the post-migration shape is unchanged. On PostgreSQL and SQLite the original code path still runs unchanged. Local SQLite tests (44 passed, 1 skipped) still pass; CI will validate on MySQL. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...2bee73611e32_composite_pk_association_tables.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py index 8a128bfd7461..8d7b2846d342 100644 --- a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py +++ b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py @@ -289,6 +289,20 @@ def upgrade() -> None: # temp-table index-name collision; on SQLite, the auto-detect picks # ``recreate=True`` because PK changes need it. if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + # MySQL ERROR 1826: foreign-key constraint names are unique + # per-database, not per-table. ``recreate="always"`` builds + # ``_alembic_tmp_
`` with the original FK names from + # ``copy_from``, but the original table still holds those + # names until it's dropped, which fails on MySQL with + # ``Duplicate foreign key constraint name``. PostgreSQL and + # SQLite scope FK names per-table, so the recreate path + # works there as-is. Drop the original FKs by name first + # on MySQL; ``copy_from`` re-creates them on the rebuilt + # table with their original names. + if conn.dialect.name == "mysql": + for fk in insp.get_foreign_keys(t.name): + if fk_name := fk.get("name"): + op.drop_constraint(fk_name, t.name, type_="foreignkey") with op.batch_alter_table( t.name, recreate="always", From 27c32af1cb64ff0eb9b4e65f5d8802bba97b084e Mon Sep 17 00:00:00 2001 From: Mike Bridge Date: Tue, 5 May 2026 10:41:03 -0600 Subject: [PATCH 6/8] fix(migration): MySQL downgrade FK + AUTO_INCREMENT (sc-105349) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two MySQL-only failures in the downgrade path, found by running the full migration history against a fresh MySQL 8 container: 1. ``MySQLdb.OperationalError: (1553, "Cannot drop index 'PRIMARY': needed in a foreign key constraint")``. InnoDB uses the composite PK index to back the FK on the leftmost column. The downgrade tried to drop the composite PK before dropping the FKs, orphaning the FK's backing index. PostgreSQL and SQLite create separate indexes for FK columns and don't trip on this. 2. ``Field 'id' doesn't have a default value`` on subsequent INSERT. ``sa.Identity(always=False)`` only emits ``AUTO_INCREMENT`` on MySQL when the column is created with ``primary_key=True`` — our portable path adds the column first then creates the PK separately, so MySQL leaves the column without auto-generation. Existing rows would all collide on id=0; future inserts fail because no default. Postgres' ``GENERATED BY DEFAULT AS IDENTITY`` and SQLite's ``INTEGER PRIMARY KEY`` rowid alias don't have this gap. Fix: extract ``_downgrade_mysql_table()`` that emits the canonical MySQL idiom — drop FKs, then a single ALTER combining ``DROP PRIMARY KEY, ADD COLUMN id INT NOT NULL AUTO_INCREMENT, ADD PRIMARY KEY (id)`` (which backfills existing rows with sequential ids and preserves AUTO_INCREMENT), restore the redundant UNIQUE on the 2 tables that originally had it, and re-add the FKs with their original names. Postgres and SQLite keep the existing portable ``batch_alter_table`` path. Raw SQL is unavoidable for the combined-ALTER form; per the constitution it's allowed for dialect-specific DDL with no SQLA equivalent, with triple-quoted strings for legibility. Verified end-to-end: upgrade → downgrade → upgrade against a fresh MySQL 8 container with INSERT-without-id sanity check showing the restored ``id`` column auto-increments correctly. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...3611e32_composite_pk_association_tables.py | 108 +++++++++++++++--- 1 file changed, 94 insertions(+), 14 deletions(-) diff --git a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py index 8d7b2846d342..e8a77614561c 100644 --- a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py +++ b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py @@ -337,19 +337,99 @@ def downgrade() -> None: # 10+ / MySQL 8+) and ``sa.Sequence`` (with explicit nextval) both # backfill existing rows during ALTER TABLE; bare ``autoincrement=True`` # does not. ``Identity`` is the modern portable choice. + conn = op.get_bind() + insp = inspect(conn) + is_mysql = conn.dialect.name == "mysql" for t in reversed(AFFECTED_TABLES): - with op.batch_alter_table(t.name) as batch_op: - batch_op.drop_constraint(f"pk_{t.name}", type_="primary") - batch_op.add_column( - sa.Column( - "id", - sa.Integer, - sa.Identity(always=False), - nullable=False, - ) - ) - batch_op.create_primary_key(f"{t.name}_pkey", ["id"]) - if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: - batch_op.create_unique_constraint( - f"uq_{t.name}_{t.fk1}_{t.fk2}", [t.fk1, t.fk2] + if is_mysql: + _downgrade_mysql_table(insp, t) + else: + with op.batch_alter_table(t.name) as batch_op: + batch_op.drop_constraint(f"pk_{t.name}", type_="primary") + batch_op.add_column( + sa.Column( + "id", + sa.Integer, + sa.Identity(always=False), + nullable=False, + ) ) + batch_op.create_primary_key(f"{t.name}_pkey", ["id"]) + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + batch_op.create_unique_constraint( + f"uq_{t.name}_{t.fk1}_{t.fk2}", [t.fk1, t.fk2] + ) + + +def _downgrade_mysql_table( + insp: sa.engine.reflection.Inspector, t: AssociationTable +) -> None: + """MySQL-specific downgrade for one table. + + Two MySQL quirks force a dialect-specific path here: + + 1. **ERROR 1553 — ``Cannot drop index 'PRIMARY': needed in a foreign + key constraint``**. InnoDB uses the composite PK index to back the + FK on the leftmost column. Dropping the PK before the FKs orphans + that backing index. PostgreSQL and SQLite create separate indexes + for FK columns and don't need this dance. We drop the FKs first + and re-add them after the structural change. + + 2. **``Identity(always=False)`` on a non-PK column add does not emit + ``AUTO_INCREMENT`` on MySQL.** SQLAlchemy 1.4 only emits + ``AUTO_INCREMENT`` when the column has both ``Identity()`` and + ``primary_key=True`` at create time. Our portable path adds the + column first, then creates the PK separately — which works on + Postgres (the column gets ``GENERATED BY DEFAULT AS IDENTITY``) + and SQLite (``INTEGER PRIMARY KEY`` becomes a rowid alias) but + leaves MySQL without auto-generation, so existing rows can't be + backfilled and future ``INSERT`` statements fail with + ``Field 'id' doesn't have a default value``. The combined + ``DROP PRIMARY KEY, ADD COLUMN AUTO_INCREMENT, ADD PRIMARY KEY`` + in a single ALTER statement is the canonical MySQL idiom: MySQL + backfills existing rows with sequential values and the column + remains auto-incrementing for future inserts. + + Raw SQL is unavoidable here — there is no SQLAlchemy core equivalent + for the combined-ALTER form, and the constitution allows raw SQL for + dialect-specific DDL with no programmatic equivalent (preferring + triple-quoted strings for legibility). + """ + fks = insp.get_foreign_keys(t.name) + + for fk in fks: + if fk_name := fk.get("name"): + op.execute(f"ALTER TABLE `{t.name}` DROP FOREIGN KEY `{fk_name}`") + + op.execute( + f""" + ALTER TABLE `{t.name}` + DROP PRIMARY KEY, + ADD COLUMN id INT NOT NULL AUTO_INCREMENT, + ADD PRIMARY KEY (id) + """ + ) + + if t.name in TABLES_WITH_PRE_EXISTING_UNIQUE: + op.execute( + f""" + ALTER TABLE `{t.name}` + ADD UNIQUE INDEX `uq_{t.name}_{t.fk1}_{t.fk2}` + (`{t.fk1}`, `{t.fk2}`) + """ + ) + + for fk in fks: + ondelete = fk.get("options", {}).get("ondelete") + ondelete_clause = f" ON DELETE {ondelete}" if ondelete else "" + local_cols = ", ".join(f"`{c}`" for c in fk["constrained_columns"]) + ref_cols = ", ".join(f"`{c}`" for c in fk["referred_columns"]) + op.execute( + f""" + ALTER TABLE `{t.name}` + ADD CONSTRAINT `{fk["name"]}` + FOREIGN KEY ({local_cols}) + REFERENCES `{fk["referred_table"]}` ({ref_cols}) + {ondelete_clause} + """ + ) From 7ae35b4f15c5ca338b5a469ca887e74486b4bde1 Mon Sep 17 00:00:00 2001 From: Mike Bridge Date: Tue, 5 May 2026 10:46:01 -0600 Subject: [PATCH 7/8] fix(migration): explicit NOT NULL on FK columns for SQLite (sc-105349) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found by running fresh-install + round-trip against a real SQLite DB: 6 of the 8 affected tables had FK columns that were originally declared nullable. PostgreSQL and MySQL implicitly promote the constituent columns of an ``ALTER TABLE ... ADD PRIMARY KEY`` to ``NOT NULL``; SQLite does not (it's a long-standing SQLite quirk — only ``INTEGER PRIMARY KEY`` enforces NOT NULL on a composite-PK column). Result: a fresh SQLite install would accept ``INSERT INTO dashboard_slices (NULL, 5)`` despite both columns being part of the composite PK. Our integration tests previously masked this: the test fixture seeds columns with ``nullable=False``, so the post-upgrade NOT NULL assertion passed regardless of whether the migration enforced it. Fix: add explicit ``batch_op.alter_column(fk, nullable=False)`` for both FK columns inside the per-table batch_alter_table block. On PostgreSQL and MySQL this is a no-op (PK already implies NOT NULL); on SQLite it adds the missing NOT NULL declaration so a fresh install matches the data-model.md "After" contract. Verified end-to-end: - Postgres + MySQL: column shape unchanged (still NOT NULL) - SQLite fresh install + round-trip: all 8 tables now have NOT NULL on FK columns, ``INSERT (NULL, 5)`` correctly rejected with IntegrityError on dashboard_slices, dashboard_user, sqlatable_user Co-Authored-By: Claude Opus 4.7 (1M context) --- ..._2bee73611e32_composite_pk_association_tables.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py index e8a77614561c..210a419d0eea 100644 --- a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py +++ b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py @@ -310,10 +310,23 @@ def upgrade() -> None: ) as batch_op: batch_op.drop_column("id") batch_op.create_primary_key(f"pk_{t.name}", [t.fk1, t.fk2]) + # SQLite quirk: composite PRIMARY KEY does not promote the + # constituent columns to NOT NULL (only ``INTEGER PRIMARY + # KEY`` does). PostgreSQL and MySQL implicitly promote the + # PK columns to NOT NULL when the constraint is added, + # so the explicit ``alter_column`` is a no-op on those + # backends but enforces the post-upgrade contract on + # SQLite. Without it, ``INSERT (NULL, 5)`` would succeed + # on SQLite despite the columns being part of the PK. + batch_op.alter_column(t.fk1, existing_type=sa.Integer, nullable=False) + batch_op.alter_column(t.fk2, existing_type=sa.Integer, nullable=False) else: with op.batch_alter_table(t.name) as batch_op: batch_op.drop_column("id") batch_op.create_primary_key(f"pk_{t.name}", [t.fk1, t.fk2]) + # See comment above re: SQLite composite-PK NOT NULL quirk. + batch_op.alter_column(t.fk1, existing_type=sa.Integer, nullable=False) + batch_op.alter_column(t.fk2, existing_type=sa.Integer, nullable=False) def downgrade() -> None: From 7b7b81c589c4508ddc2dfde68827b49167fb4cb4 Mon Sep 17 00:00:00 2001 From: Mike Bridge Date: Tue, 5 May 2026 11:07:10 -0600 Subject: [PATCH 8/8] fix(migration): rebase down_revision onto 33d7e0e21daa (sc-105349) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI cypress + playwright shards were red with: ERROR [flask_migrate] Error: Multiple head revisions are present for given argument 'head' The recent rebase onto master pulled in ``33d7e0e21daa_add_semantic_layers_and_views.py`` (from PR #37815, "semantic layer extension"), which had been authored against ``ce6bd21901ab`` as its parent — the same parent our migration referenced. After the rebase both migrations point at ``ce6bd21901ab``, producing two heads and breaking ``flask db upgrade head`` for any downstream consumer (CI's Cypress / Playwright shards spin up a real Superset instance via ``superset db upgrade``, which is why those shards failed first; the integration shards run against a precomputed schema and didn't surface this). Fix: chain our migration after the semantic-layer migration by pointing ``down_revision`` at ``33d7e0e21daa``. The chain is now linear: ... → ce6bd21901ab → 33d7e0e21daa (semantic layers) → 2bee73611e32 (composite PK, this PR) Verified with ``superset db heads`` (returns single head ``2bee73611e32``) and the local migration test suite (44 passed, 1 skipped). Co-Authored-By: Claude Opus 4.7 (1M context) --- ...5-01_23-36_2bee73611e32_composite_pk_association_tables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py index 210a419d0eea..055ecd3c9700 100644 --- a/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py +++ b/superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py @@ -27,7 +27,7 @@ of the eight tables lacked DB-level uniqueness. Revision ID: 2bee73611e32 -Revises: ce6bd21901ab +Revises: 33d7e0e21daa Create Date: 2026-05-01 23:36:34.050058 """ @@ -42,7 +42,7 @@ # revision identifiers, used by Alembic. revision = "2bee73611e32" -down_revision = "ce6bd21901ab" +down_revision = "33d7e0e21daa" logger = logging.getLogger("alembic.env")