diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index b630e13b968c7..6479cf172bcc1 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -43,6 +43,7 @@
 import subprocess
 import sys
 import traceback
+from typing import List
 from urllib.request import urlopen
 from urllib.request import Request
 from urllib.error import HTTPError
@@ -700,77 +701,275 @@ def resolve_jira_issues(title, merge_branches, comment):
         resolve_jira_issue(merge_branches, comment, jira_id)
 
 
-def standardize_jira_ref(text):
-    """
-    Standardize the [SPARK-XXXXX] [MODULE] prefix
-    Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to
-    "[SPARK-XXX][MLLIB] Issue"
-
-    >>> standardize_jira_ref(
-    ...     "[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
-    '[SPARK-5821][SQL] ParquetRelation2 CTAS should check if delete is successful'
-    >>> standardize_jira_ref(
-    ...     "[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
-    '[SPARK-4123][PROJECT INFRA][WIP] Show new dependencies added in pull requests'
-    >>> standardize_jira_ref("[MLlib] Spark  5954: Top by key")
-    '[SPARK-5954][MLLIB] Top by key'
-    >>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl")
-    '[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
-    >>> standardize_jira_ref(
-    ...     "SPARK-1094 Support MiMa for reporting binary compatibility across versions.")
-    '[SPARK-1094] Support MiMa for reporting binary compatibility across versions.'
-    >>> standardize_jira_ref("[WIP]  [SPARK-1146] Vagrant support for Spark")
-    '[SPARK-1146][WIP] Vagrant support for Spark'
-    >>> standardize_jira_ref(
-    ...     "SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
-    '[SPARK-1032] If Yarn app fails before registering, app master stays aroun...'
-    >>> standardize_jira_ref(
-    ...     "[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
-    '[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.'
-    >>> standardize_jira_ref(
-    ...     'Revert "[SPARK-48591][PYTHON] Simplify the if-else branches with F.lit"')
-    'Revert "[SPARK-48591][PYTHON] Simplify the if-else branches with F.lit"'
-    >>> standardize_jira_ref("Additional information for users building from source code")
-    'Additional information for users building from source code'
+
+class Component:
+    """A Spark PR-title tag, paired with its canonical JIRA component name.
+
+    ``jira_name`` is the canonical name of the SPARK JIRA component (e.g.
+    "Documentation"); empty for status markers like [MINOR] that are not
+    JIRA components but are still recognized in PR titles.
+
+    ``tag`` is the preferred PR-title abbreviation (uppercase, no brackets,
+    e.g. "DOC"). ``aliases`` lists other accepted spellings that resolve to
+    the same component (e.g. "DOCS", "DOCUMENTATION" -> "DOC").
+
+    ``primary`` marks components whose presence alone satisfies the merge-time
+    requirement. Non-primary JIRA components (e.g. [TEST], [PS], [SHUFFLE])
+    remain recognized — they normalize and pass through validation — but
+    they must be paired with a primary tag (e.g. [SQL][TEST]). Status
+    markers are never primary. [WIP] is intentionally absent from the
+    registry: a WIP PR should be aborted at the earlier WIP warning, not
+    merged.
     """
-    jira_refs = []
-    components = []
 
-    # If this is a Revert PR, no need to process any further
-    if text.startswith('Revert "') and text.endswith('"'):
-        return text
+    def __init__(self, tag, aliases=(), primary=False, jira_name=""):
+        self.tag = tag
+        self.aliases = frozenset(aliases)
+        self.primary = primary
+        self.jira_name = jira_name
 
-    # If the string is compliant, no need to process any further
-    if re.search(r"^\[SPARK-[0-9]{3,6}\](\[[A-Z0-9_\s,]+\] )+\S+", text):
-        return text
+    def matches(self, token):
+        return token == self.tag or token in self.aliases
 
-    # Extract JIRA ref(s):
-    pattern = re.compile(r"(SPARK[-\s]*[0-9]{3,6})+", re.IGNORECASE)
-    for ref in pattern.findall(text):
-        # Add brackets, replace spaces with a dash, & convert to uppercase
-        jira_refs.append("[" + re.sub(r"\s+", "-", ref.upper()) + "]")
-        text = text.replace(ref, "")
+    @classmethod
+    def find(cls, token):
+        """Return the Component matching ``token`` (case-insensitive), or None."""
+        if token is None:
+            return None
+        token = token.strip().upper()
+        for c in COMPONENTS:
+            if c.matches(token):
+                return c
+        return None
 
-    # Extract spark component(s):
-    # Look for alphanumeric chars, spaces, dashes, periods, and/or commas
-    pattern = re.compile(r"(\[[\w\s,.-]+\])", re.IGNORECASE)
-    for component in pattern.findall(text):
-        components.append(component.upper())
-        text = text.replace(component, "")
 
-    # Cleanup any remaining symbols:
-    pattern = re.compile(r"^\W+(.*)", re.IGNORECASE)
-    if pattern.search(text) is not None:
-        text = pattern.search(text).groups()[0]
 
-    # Assemble full text (JIRA ref(s), module(s), remaining text)
-    clean_text = "".join(jira_refs).strip() + "".join(components).strip() + " " + text.strip()
+# Full SPARK JIRA component list (sorted alphabetically by tag), followed
+# by status markers. Keep in sync with the components in JIRA — fetch the
+# current list with:
+#   curl -s https://issues.apache.org/jira/rest/api/2/project/SPARK/components
+# A `primary=True` marker indicates the tag alone satisfies the merge-time
+# component requirement; non-primary JIRA components must be paired with a
+# primary one (e.g. [SQL][TEST], [PYTHON][PS], [CORE][SHUFFLE]). Status
+# markers leave `jira_name` empty.
+COMPONENTS = (
+    Component("BLOCK MANAGER", ("BLOCK_MANAGER",), jira_name="Block Manager"),
+    Component("BUILD", primary=True, jira_name="Build"),
+    Component("CONNECT", primary=True, jira_name="Connect"),
+    Component("CORE", ("SPARK CORE", "SPARK_CORE"), primary=True, jira_name="Spark Core"),
+    Component("DEPLOY", jira_name="Deploy"),
+    Component("DOC", ("DOCS", "DOCUMENTATION"), primary=True, jira_name="Documentation"),
+    Component("DOCKER", primary=True, jira_name="Spark Docker"),
+    Component("EC2", jira_name="EC2"),
+    Component("EXAMPLE", ("EXAMPLES",), jira_name="Examples"),
+    Component("GRAPHX", primary=True, jira_name="GraphX"),
+    Component("INFRA", ("PROJECT INFRA", "PROJECT_INFRA"), primary=True, jira_name="Project Infra"),
+    Component("IO", jira_name="Input/Output"),
+    Component("JAVA", ("JAVA API", "JAVA_API", "JAVAAPI"), jira_name="Java API"),
+    Component("K8S", ("KUBERNETES",), primary=True, jira_name="Kubernetes"),
+    Component("MESOS", jira_name="Mesos"),
+    Component("ML", primary=True, jira_name="ML"),
+    Component("MLLIB", primary=True, jira_name="MLlib"),
+    Component("OPTIMIZER", jira_name="Optimizer"),
+    Component("PROTOBUF", jira_name="Protobuf"),
+    Component("PS", primary=True, jira_name="Pandas API on Spark"),
+    Component("PYTHON", ("PYSPARK",), primary=True, jira_name="PySpark"),
+    Component("R", ("SPARKR",), primary=True, jira_name="R"),
+    Component("REPL", ("SHELL", "SPARK SHELL", "SPARK_SHELL"), jira_name="Spark Shell"),
+    Component("SCHEDULER", jira_name="Scheduler"),
+    Component("SDP", ("PIPELINES",), primary=True, jira_name="Declarative Pipelines"),
+    Component("SECURITY", primary=True, jira_name="Security"),
+    Component("SHUFFLE", jira_name="Shuffle"),
+    # Component("SPARKR", jira_name="SparkR"),  # SPARKR is treated as an alias of R above
+    Component("SQL", primary=True, jira_name="SQL"),
+    Component("SS", primary=True, jira_name="Structured Streaming"),
+    Component("STREAMING", ("DSTREAM", "DSTREAMS"), primary=True, jira_name="DStreams"),
+    Component("SUBMIT", jira_name="Spark Submit"),
+    Component("TEST", ("TESTS", "TEST-ONLY", "TESTS-ONLY"), jira_name="Tests"),
+    Component("UI", ("WEBUI", "WEB UI", "WEB_UI"), primary=True, jira_name="Web UI"),
+    Component("WINDOWS", primary=True, jira_name="Windows"),
+    Component("YARN", primary=True, jira_name="YARN"),
+    # Status markers — recognized in PR titles, but not JIRA components.
+    Component("FOLLOWUP", ("FOLLOW-UP", "FOLLOW UP")),
+    Component("MINOR"),
+    Component("TRIVIAL"),
+)
+
+
+_BRACKET_TAG_RE = re.compile(r"\[\s*([A-Za-z0-9._-]+)\s*\]")
+_SPARK_ID_RE = re.compile(r"^SPARK-\d+$", re.IGNORECASE)
+_VERSION_TAG_RE = re.compile(r"^\d+\.(\d+|X)$")
+_LEADING_TAGS = frozenset({"MINOR", "TRIVIAL"})
+
+
+class Title:
+    """Structured PR title: SPARK refs, component tags, and body.
+
+    ``leading``    — SPARK-NNNNN IDs and [MINOR]/[TRIVIAL] markers, in order.
+    ``components`` — all other bracket tags, in order.
+    ``text``       — body text following the bracket sequence.
+
+    >>> t = Title.parse("[SPARK-1234][SQL] Fix something")
+    >>> t.leading, t.components, t.text
+    (['SPARK-1234'], ['SQL'], 'Fix something')
+    >>> str(t)
+    '[SPARK-1234][SQL] Fix something'
+    >>> t = Title.parse("[SPARK-1234][SQL][FOLLOWUP] Fix something")
+    >>> t.leading, t.components, t.text
+    (['SPARK-1234'], ['SQL', 'FOLLOWUP'], 'Fix something')
+    >>> str(t)
+    '[SPARK-1234][SQL][FOLLOWUP] Fix something'
+    """
 
-    # Replace multiple spaces with a single space, e.g. if no jira refs and/or components were
-    # included
-    clean_text = re.sub(r"\s+", " ", clean_text.strip())
+    def __init__(
+        self,
+        leading: List[str],
+        components: List[str],
+        text: str,
+    ) -> None:
+        self.leading = leading
+        self.components = components
+        self.text = text
+
+    @classmethod
+    def parse(cls, raw: str) -> "Title":
+        """Parse a PR title string into a :class:`Title`.
+
+        A title must open with a leading tag ([SPARK-NNNNN], [MINOR], or
+        [TRIVIAL]); otherwise :exc:`ValueError` is raised.  Subsequent bracket
+        tokens (spaces trimmed, separated by optional whitespace) go to
+        ``components``.  The remainder is ``text``.
+
+        >>> t = Title.parse("[SPARK-1234][SQL][TESTS] Fix something")
+        >>> t.leading, t.components, t.text
+        (['SPARK-1234'], ['SQL', 'TESTS'], 'Fix something')
+        >>> t = Title.parse("  [ SPARK-1234 ]  [ SQL ] [  TESTS  ]   Fix something")
+        >>> t.leading, t.components, t.text
+        (['SPARK-1234'], ['SQL', 'TESTS'], 'Fix something')
+        >>> t = Title.parse("[SPARK-1234 ][ sql ][ followup ] Fix")
+        >>> t.leading, t.components, t.text
+        (['SPARK-1234'], ['SQL', 'FOLLOWUP'], 'Fix')
+        >>> str(t)
+        '[SPARK-1234][SQL][FOLLOWUP] Fix'
+        >>> Title.parse("[MINOR] Fix typo").leading
+        ['MINOR']
+        >>> t = Title.parse("[spark-1234][sql][followup] Fix")
+        >>> t.leading, t.components
+        (['SPARK-1234'], ['SQL', 'FOLLOWUP'])
+        >>> Title.parse("[SPARK-1234][SPARK-5678][SQL] Fix").leading
+        ['SPARK-1234', 'SPARK-5678']
+        >>> Title.parse("[SPARK-1234][4.X][SQL] Fix").components
+        ['4.X', 'SQL']
+        >>> Title.parse("[SPARK-1234][SQL][4.2] Fix").components
+        ['SQL', '4.2']
+        >>> Title.parse("[SQL] Fix")
+        Traceback (most recent call last):
+            ...
+        ValueError: title must start with [SPARK-NNNNN], [MINOR], or [TRIVIAL]: '[SQL] Fix'
+        >>> Title.parse("No brackets")
+        Traceback (most recent call last):
+            ...
+        ValueError: title must start with [SPARK-NNNNN], [MINOR], or [TRIVIAL]: 'No brackets'
+        >>> Title.parse("[SPARK-1234][SQL][SPARK-123] Fix")
+        Traceback (most recent call last):
+            ...
+        ValueError: [SPARK-NNNNN] tags must all appear before other tags: '[SPARK-1234][SQL][SPARK-123] Fix'
+        >>> Title.parse("[SPARK-1234][MINOR][SQL] Fix")
+        Traceback (most recent call last):
+            ...
+        ValueError: [SPARK-NNNNN], [MINOR], and [TRIVIAL] cannot coexist
+        >>> Title.parse("[MINOR][TRIVIAL][SQL] Fix")
+        Traceback (most recent call last):
+            ...
+        ValueError: [SPARK-NNNNN], [MINOR], and [TRIVIAL] cannot coexist
+        """
+        leading: List[str] = []
+        components: List[str] = []
+
+        raw = raw.strip()
+        m0 = _BRACKET_TAG_RE.match(raw)
+        first = m0.group(1).upper() if m0 else ""
+        if not (_SPARK_ID_RE.match(first) or first in _LEADING_TAGS):
+            raise ValueError(
+                "title must start with [SPARK-NNNNN], [MINOR], or [TRIVIAL]: %r" % raw
+            )
 
-    return clean_text
+        past_leading = False
+        pos = 0
+        while pos < len(raw):
+            m = _BRACKET_TAG_RE.match(raw, pos)
+            if not m:
+                break
+            tag = m.group(1).upper()
+            if _SPARK_ID_RE.match(tag):
+                if past_leading:
+                    raise ValueError(
+                        "[SPARK-NNNNN] tags must all appear before other tags: %r" % raw
+                    )
+                leading.append(tag)
+            elif tag in _LEADING_TAGS:
+                leading.append(tag)
+            else:
+                components.append(tag)
+                past_leading = True
+            pos = m.end()
+            while pos < len(raw) and raw[pos] == " ":
+                pos += 1
+
+        text = raw[pos:].lstrip()
+        markers = [t for t in leading if t in _LEADING_TAGS]
+        if len(markers) > 1 or (markers and len(leading) > len(markers)):
+            raise ValueError(
+                "[SPARK-NNNNN], [MINOR], and [TRIVIAL] cannot coexist"
+            )
+        return cls(leading, components, text)
+
+    def __str__(self) -> str:
+        parts = "".join("[%s]" % t for t in self.leading)
+        parts += "".join("[%s]" % c for c in self.components)
+        if not self.text:
+            return parts
+        return parts + (" " if parts else "") + self.text
+
+
+def prompt_for_components():
+    """
+    Prompt the committer for component(s) when the PR title lacks a primary
+    component. Each entered token is normalized via Component.find
+    (e.g. "DOCS" -> "DOC", "PYSPARK" -> "PYTHON"). Unrecognized tokens are
+    passed through as-is. Re-prompts until at least one entered token resolves
+    to a tag in PRIMARY_COMPONENTS. Returns an uppercase list of tags in
+    insertion order.
+    """
+    print("PR title is missing a primary [COMPONENT] tag.")
+    print("Primary components (one of these is required):")
+    primary = [c for c in COMPONENTS if c.primary]
+    width = max(len(c.tag) for c in primary)
+    for c in primary:
+        print("  [%s]%s - %s" % (c.tag, " " * (width - len(c.tag)), c.jira_name))
+    while True:
+        raw = bold_input(
+            "Enter comma-separated component(s) to insert into the title (e.g. CORE,SQL): "
+        )
+        components = []
+        has_primary = False
+        for token in raw.split(","):
+            t = token.strip().upper()
+            if t:
+                c = Component.find(t)
+                if c is not None and c.primary:
+                    has_primary = True
+                components.append(c.tag if c else t)
+        if not components:
+            print_error("Component(s) cannot be empty. Please enter at least one.")
+            continue
+        if not has_primary:
+            print_error(
+                "At least one component must be a primary tag (see list above). "
+                "Got: %s" % ", ".join(components)
+            )
+            continue
+        return components
 
 
 def get_current_ref():
@@ -843,28 +1042,65 @@ def main():
     pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
 
     url = pr["url"]
+    title = pr["title"]
 
-    # Warn if the PR is WIP
-    if "[WIP]" in pr["title"]:
-        msg = "The PR title has `[WIP]`:\n%s\nContinue?" % pr["title"]
-        continue_maybe(msg)
+    # Fail hard on WIP or DO-NOT-MERGE to prevent accidental merges.
+    if "[WIP]" in title or "[DO-NOT-MERGE]" in title:
+        fail("Cannot merge a PR with [WIP] or [DO-NOT-MERGE] in the title:\n%s" % title)
 
-    # Decide whether to use the modified title or not
-    modified_title = standardize_jira_ref(pr["title"]).rstrip(".")
-    if modified_title != pr["title"]:
-        print("I've re-written the title as follows to match the standard format:")
-        print("Original: %s" % pr["title"])
-        print("Modified: %s" % modified_title)
-        result = bold_input("Would you like to use the modified title? (y/N): ")
-        if result.lower() == "y":
-            title = modified_title
-            print("Using modified title:")
-        else:
-            title = pr["title"]
-            print("Using original title:")
-        print(title)
-    else:
-        title = pr["title"]
+    # e.g. 'Revert "[SPARK-56357][BUILD] Upgrade sbt to 1.12.8"'
+    is_revert_pr = title.startswith('Revert "') and title.endswith('"')
+    # e.g. 'Reapply "[SPARK-56357][BUILD] Upgrade sbt to 1.12.8"'
+    is_reapply_pr = title.startswith('Reapply "') and title.endswith('"')
+
+    # Revert and Reapply PRs keep their title verbatim.
+    if not (is_revert_pr or is_reapply_pr):
+        # Parse; fail on a malformed title.
+        try:
+            parsed = Title.parse(title)
+        except ValueError as e:
+            fail("Malformed PR title: %s" % e)
+
+        # Normalize component tags via the registry and track primary.
+        components = []
+        has_primary = False
+        for tag in parsed.components:
+            c = Component.find(tag)
+            if c is not None and c.primary:
+                has_primary = True
+            components.append(c.tag if c is not None else tag)
+        if not has_primary:
+            new_tags = prompt_for_components()
+            components = new_tags + components
+
+        # Deduplicate tags in insertion order.
+        components = list(dict.fromkeys(components))
+
+        # Move version tags (e.g. [4.X], [4.2]) to the head of components.
+        versions = [t for t in components if _VERSION_TAG_RE.match(t)]
+        if versions:
+            others = [t for t in components if not _VERSION_TAG_RE.match(t)]
+            components = versions + others
+
+        # Move FOLLOWUP to the last tag.
+        non_followup = [t for t in components if t != "FOLLOWUP"]
+        if len(non_followup) < len(components):
+            components = non_followup + ["FOLLOWUP"]
+
+        # Warn about tags that are neither known components nor version tags.
+        unknown = [
+            t for t in components
+            if Component.find(t) is None and not _VERSION_TAG_RE.match(t)
+        ]
+        if unknown:
+            print_error(
+                "Title has unknown tag(s): %s" % ", ".join("[%s]" % t for t in unknown)
+            )
+
+        parsed.components = components
+        title = str(parsed)
+        if title != pr["title"]:
+            print("Normalized title: %s" % title)
 
     body = pr["body"]
     if body is None: