From 59ecb6a24f2016fc85fe1594136f9181b326fc2d Mon Sep 17 00:00:00 2001
From: Chen Li <chenlii@fb.com>
Date: Sun, 14 Jun 2026 19:43:45 -0700
Subject: [PATCH 1/2] [LLDB][AMDGPU] Refine core comparison framework

---
 .../amd/TestAmdGpuCoreFileComparison.py       | 221 +++++++++++++++++-
 .../gpu/comparison/framework/comparator.py    |  91 +++++++-
 .../framework/debugger_interface.py           |   5 +-
 .../gpu/comparison/framework/gdb_driver.py    |  34 ++-
 .../gpu/comparison/framework/lldb_driver.py   |  65 +++++-
 5 files changed, 400 insertions(+), 16 deletions(-)
diff --git a/lldb/test/API/gpu/comparison/amd/TestAmdGpuCoreFileComparison.py b/lldb/test/API/gpu/comparison/amd/TestAmdGpuCoreFileComparison.py
index 9ae4f38d8cd5d..398a688181777 100644
--- a/lldb/test/API/gpu/comparison/amd/TestAmdGpuCoreFileComparison.py
+++ b/lldb/test/API/gpu/comparison/amd/TestAmdGpuCoreFileComparison.py
@@ -13,6 +13,11 @@
     test_thread_count__<basename>
     test_registers__<basename>
     test_local_variables__<basename>
+    test_modules__<basename>     (combined module list)
+    test_backtrace__<basename>   (faulting GPU wave backtrace)
+
+  Each comparison logs the data gathered from both debuggers (module lists,
+  backtraces) and any differences via self.trace(), visible in the dotest log.
 
 ARCHITECTURAL DIFFERENCE:
 - LLDB: Creates TWO targets (CPU + GPU). Must use `target select` to switch
@@ -43,6 +48,7 @@
 sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
 
 from framework.comparator import ResultComparator
+from framework.debugger_interface import DebuggerResult
 from framework.gdb_driver import GdbDriver
 from framework.lldb_driver import LldbDriver
 
@@ -54,10 +60,14 @@ def _get_default_core_dir():
 
 
 def _get_rocgdb_path():
-    """Get ROCgdb path by looking in PATH.
-    TODO: make this configurable via lit configuration.
+    """Get the ROCgdb binary.
+
+    Prefer GPU_COMPARISON_ROCGDB (the eval script points this at the
+    platform010 ROCm-7.0 rocgdb, which can open ROCm7 GPU cores; the system
+    /usr/bin/rocgdb fails to initialize the ROCm debug API on them). Fall
+    back to PATH.
     """
-    return shutil.which("rocgdb")
+    return os.environ.get("GPU_COMPARISON_ROCGDB") or shutil.which("rocgdb")
 
 
 def _get_core_files():
@@ -89,7 +99,7 @@ class TestAmdGpuCoreFileComparison(TestBase):
     # Per-core-file setup / teardown helpers
     # ------------------------------------------------------------------
 
-    def _load_core(self, core_path):
+    def _load_core(self, core_path, auto_load_debuginfo=False):
         """Load a core file in both debuggers and return (gdb_driver, lldb_driver, comparator)."""
         rocgdb_path = _get_rocgdb_path()
         if not rocgdb_path:
@@ -119,8 +129,18 @@ def _load_core(self, core_path):
             pc_tolerance=0,
         )
 
-        gdb_driver.load_core(core_path)
-        lldb_driver.load_core(core_path)
+        # Module and backtrace comparisons request auto-load-debuginfo; the
+        # other comparisons keep the cheaper plain core load.
+        gdb_result = gdb_driver.load_core(
+            core_path, auto_load_debuginfo=auto_load_debuginfo
+        )
+        lldb_result = lldb_driver.load_core(
+            core_path, auto_load_debuginfo=auto_load_debuginfo
+        )
+        for label, result in (("ROCgdb", gdb_result), ("ROCLLDB", lldb_result)):
+            auto_load_output = result.extra_data.get("auto_load_output")
+            if auto_load_output:
+                self.trace(f"{label} auto-load-debuginfo:\n{auto_load_output}")
 
         # Store for cleanup in tearDown
         self._active_gdb_driver = gdb_driver
@@ -379,6 +399,169 @@ def _run_local_variables_comparison(self, core_path):
             ),
         )
 
+    def _run_module_comparison(self, core_path):
+        """Compare combined module lists for a core file.
+
+        ROCgdb reports modules as one flat objfile list. ROCLLDB keeps CPU and
+        GPU modules on separate targets, so gather both targets into one result
+        before comparing. This avoids depending on debugger-specific host/device
+        classification.
+        """
+        gdb_driver, lldb_driver, comparator = self._load_core(
+            core_path, auto_load_debuginfo=True
+        )
+
+        gdb_result = gdb_driver.get_modules()
+        self.assertTrue(
+            gdb_result.success,
+            f"GDB failed to list modules: {gdb_result.error_message}",
+        )
+
+        lldb_result = self._get_lldb_combined_modules(lldb_driver)
+        self.assertTrue(
+            lldb_result.success,
+            f"LLDB failed to list modules: {lldb_result.error_message}",
+        )
+
+        comparison = comparator.compare_modules(gdb_result, lldb_result)
+
+        def fmt(mod):
+            return f"{mod.name} uuid={mod.uuid or '?'}"
+
+        self.trace("\n=== Module comparison ===")
+        self.trace(f"GDB modules: {len(gdb_result.modules)}")
+        self.trace(f"LLDB modules: {len(lldb_result.modules)}")
+        for mod in gdb_result.modules:
+            self.trace(f"  GDB: {fmt(mod)}")
+        for mod in lldb_result.modules:
+            self.trace(f"  LLDB: {fmt(mod)}")
+
+        if not gdb_result.modules and not lldb_result.modules:
+            self.skipTest(
+                "No modules reported by either debugger "
+                "(executable/debug info unavailable for this core?)"
+            )
+
+        differences = comparison.differences
+        gdb_only_modules = comparison.gdb_only.get("modules", [])
+        lldb_only_modules = comparison.lldb_only.get("modules", [])
+
+        # These keys have already been normalized by the comparator, so one-sided
+        # entries here are actionable comparison differences.
+        if gdb_only_modules:
+            self.trace(
+                f"  GDB-only normalized module keys ({len(gdb_only_modules)}): "
+                + ", ".join(str(m) for m in gdb_only_modules[:10])
+            )
+        if lldb_only_modules:
+            self.trace(
+                f"  LLDB-only normalized module keys ({len(lldb_only_modules)}): "
+                + ", ".join(str(m) for m in lldb_only_modules[:10])
+            )
+
+        failure_lines = []
+        if differences:
+            failure_lines.append(f"Module differences: {len(differences)}; first 10:")
+            for diff in differences[:10]:
+                self.trace(f"  {diff.description}")
+                failure_lines.append(f"  {diff.description}")
+
+        if failure_lines:
+            self.fail("Module comparison failed:\n" + "\n".join(failure_lines))
+
+    def _get_lldb_combined_modules(self, lldb_driver):
+        modules = []
+        errors = []
+
+        for target_name, select in (
+            ("CPU", lldb_driver.select_cpu),
+            ("GPU", lldb_driver.select_gpu),
+        ):
+            select_result = select()
+            if not select_result.success:
+                self.trace(
+                    f"\nLLDB {target_name} target modules: skipped "
+                    f"({select_result.error_message})"
+                )
+                continue
+
+            result = lldb_driver.get_modules()
+            if not result.success:
+                errors.append(
+                    f"LLDB failed to list {target_name} target modules: "
+                    f"{result.error_message}"
+                )
+                continue
+
+            self.trace(f"\nLLDB {target_name} target modules: {len(result.modules)}")
+            modules.extend(result.modules)
+
+        if errors:
+            return DebuggerResult(success=False, error_message="\n".join(errors))
+        return DebuggerResult(success=True, modules=modules)
+
+    def _run_backtrace_comparison(self, core_path):
+        """Compare the faulting GPU wave's backtrace between debuggers.
+
+        Both debuggers select the faulting wave by default when loading a
+        core; rely on that selection. IMPORTANT: Do NOT call
+        get_all_threads() first as it changes GDB's selected thread!
+
+        PCs must match unconditionally; function names are compared only
+        for frames both debuggers symbolized (production cores are often
+        unsymbolized unless debug info was loaded).
+        """
+        gdb_driver, lldb_driver, comparator = self._load_core(
+            core_path, auto_load_debuginfo=True
+        )
+
+        gdb_result = gdb_driver.get_backtrace()
+        self.assertTrue(
+            gdb_result.success,
+            f"GDB failed to get backtrace: {gdb_result.error_message}",
+        )
+
+        select_result = lldb_driver.select_gpu()
+        if not select_result.success:
+            self.skipTest(select_result.error_message)
+        if lldb_driver.get_thread_count() == 0:
+            self.skipTest("No GPU threads in LLDB")
+
+        lldb_result = lldb_driver.get_backtrace()
+        self.assertTrue(
+            lldb_result.success,
+            f"LLDB failed to get backtrace: {lldb_result.error_message}",
+        )
+
+        def fmt(frame):
+            location = f" at {frame.file}:{frame.line}" if frame.file else ""
+            return f"#{frame.index} {hex(frame.pc)} {frame.function}{location}"
+
+        self.trace("\n=== GPU backtrace comparison (faulting wave) ===")
+        self.trace(f"GDB frames: {len(gdb_result.backtrace)}")
+        self.trace(f"LLDB frames: {len(lldb_result.backtrace)}")
+        for frame in gdb_result.backtrace[:10]:
+            self.trace(f"  GDB: {fmt(frame)}")
+        for frame in lldb_result.backtrace[:10]:
+            self.trace(f"  LLDB: {fmt(frame)}")
+
+        comparison = comparator.compare_backtrace(gdb_result, lldb_result)
+
+        # Empty on both sides is no coverage, not parity (see module test).
+        if not gdb_result.backtrace and not lldb_result.backtrace:
+            self.skipTest("Neither debugger produced any backtrace frames")
+
+        if comparison.differences:
+            failure_lines = [
+                f"Backtrace differences: {len(comparison.differences)}; first 10:"
+            ]
+            for diff in comparison.differences[:10]:
+                self.trace(f"  {diff.description}")
+                failure_lines.append(f"  {diff.description}")
+            self.fail(
+                "GPU backtrace comparison failed:\n" + "\n".join(failure_lines)
+            )
+
     # ------------------------------------------------------------------
     # Placeholder when no core files are available
     # ------------------------------------------------------------------
@@ -436,6 +619,22 @@ def test(self):
             test.__doc__ = f"Local variables comparison for {os.path.basename(cp)}"
             return test
 
+        def make_modules_test(cp=core_path):
+            @skipUnlessArch("x86_64")
+            @skipUnlessPlatform(["linux"])
+            def test(self):
+                self._run_module_comparison(cp)
+            test.__doc__ = f"Module comparison for {os.path.basename(cp)}"
+            return test
+
+        def make_backtrace_test(cp=core_path):
+            @skipUnlessArch("x86_64")
+            @skipUnlessPlatform(["linux"])
+            def test(self):
+                self._run_backtrace_comparison(cp)
+            test.__doc__ = f"GPU backtrace comparison for {os.path.basename(cp)}"
+            return test
+
         setattr(
             TestAmdGpuCoreFileComparison,
             f"test_thread_count__{basename}",
@@ -451,6 +650,16 @@ def test(self):
             f"test_local_variables__{basename}",
             make_local_variables_test(),
         )
+        setattr(
+            TestAmdGpuCoreFileComparison,
+            f"test_modules__{basename}",
+            make_modules_test(),
+        )
+        setattr(
+            TestAmdGpuCoreFileComparison,
+            f"test_backtrace__{basename}",
+            make_backtrace_test(),
+        )
 
 
 _add_core_file_tests()
diff --git a/lldb/test/API/gpu/comparison/framework/comparator.py b/lldb/test/API/gpu/comparison/framework/comparator.py
index 9426cbd08dc4c..c8ad889a7bd4a 100644
--- a/lldb/test/API/gpu/comparison/framework/comparator.py
+++ b/lldb/test/API/gpu/comparison/framework/comparator.py
@@ -2,7 +2,10 @@
 Result comparator for comparing GDB and LLDB debugging outputs.
 """
 
+from collections import Counter
 from dataclasses import dataclass, field
+import os
+import re
 from typing import List, Dict, Any, Optional, Set, Tuple
 from .debugger_interface import (
     DebuggerResult,
@@ -321,26 +324,102 @@ def compare_modules(
         """Compare loaded modules from GDB and LLDB."""
         result = ComparisonResult()
 
-        gdb_modules = {m.name: m for m in gdb_result.modules}
-        lldb_modules = {m.name: m for m in lldb_result.modules}
+        gdb_modules = Counter(
+            key
+            for m in gdb_result.modules
+            if (key := self._normalize_module_key(m)) is not None
+        )
+        lldb_modules = Counter(
+            key
+            for m in lldb_result.modules
+            if (key := self._normalize_module_key(m)) is not None
+        )
 
         all_names = set(gdb_modules.keys()) | set(lldb_modules.keys())
 
         for name in sorted(all_names):
-            gdb_mod = gdb_modules.get(name)
-            lldb_mod = lldb_modules.get(name)
+            gdb_count = gdb_modules.get(name, 0)
+            lldb_count = lldb_modules.get(name, 0)
 
-            if gdb_mod is None:
+            if gdb_count == 0:
                 result.add_lldb_only("modules", name)
+                result.add_difference(
+                    "modules",
+                    name,
+                    0,
+                    lldb_count,
+                    f"Module '{name}' only in LLDB",
+                )
                 continue
 
-            if lldb_mod is None:
+            if lldb_count == 0:
                 result.add_gdb_only("modules", name)
+                result.add_difference(
+                    "modules",
+                    name,
+                    gdb_count,
+                    0,
+                    f"Module '{name}' only in GDB",
+                )
                 continue
 
+            if gdb_count != lldb_count:
+                result.add_difference(
+                    "modules",
+                    name,
+                    gdb_count,
+                    lldb_count,
+                    f"Module '{name}' count differs: GDB={gdb_count}, LLDB={lldb_count}",
+                )
+
         result.summary = result.get_summary()
         return result
 
+    def _normalize_module_key(self, module: ModuleInfo) -> str | None:
+        """Normalize debugger-specific module names into comparable keys."""
+        raw_path = module.path or ""
+        name = module.name or os.path.basename(raw_path) or "<unknown>"
+
+        # GDB exposes .gnu_debugdata as a synthetic objfile. It is debug info
+        # for the following module, not a separately loaded module.
+        if raw_path.startswith(".gnu_debugdata for "):
+            return None
+
+        if name == "[vdso]" or raw_path.startswith("system-supplied DSO"):
+            return "vdso"
+
+        # ROCgdb memory code objects look like:
+        #   4377#offset=0x7c...&size=43392
+        #   memory://4377#offset=0x7c...&size=43392
+        offset_match = re.search(
+            r"#offset=(0x[0-9a-fA-F]+|\d+)&size=(0x[0-9a-fA-F]+|\d+)",
+            raw_path or name,
+        )
+        if offset_match and (
+            (raw_path or name).startswith("memory://")
+            or (name.split("#", 1)[0].isdigit())
+        ):
+            start = int(offset_match.group(1), 0)
+            size = int(offset_match.group(2), 0)
+            return f"memory:{start:x}-{start + size:x}"
+
+        # ROCLLDB memory code objects look like:
+        #   amd_memory_kernel[0x7c..., 0x7c...)
+        kernel_match = re.search(
+            r"amd_memory_kernel\[(0x[0-9a-fA-F]+),\s*(0x[0-9a-fA-F]+)\)",
+            name,
+        )
+        if kernel_match:
+            start = int(kernel_match.group(1), 16)
+            end = int(kernel_match.group(2), 16)
+            return f"memory:{start:x}-{end:x}"
+
+        # File-backed GPU code objects may have #offset/#size suffixes on one
+        # side. Keep the backing file basename as the module identity.
+        path = (raw_path or name).removeprefix("file://")
+        path = path.split("#offset=", 1)[0]
+        return os.path.basename(path) or name
+
     def _normalize_function_name(self, name: str) -> str:
         """Normalize function name for comparison."""
         if not name:
diff --git a/lldb/test/API/gpu/comparison/framework/debugger_interface.py b/lldb/test/API/gpu/comparison/framework/debugger_interface.py
index ba885ce93ab5a..7036710bafdd2 100644
--- a/lldb/test/API/gpu/comparison/framework/debugger_interface.py
+++ b/lldb/test/API/gpu/comparison/framework/debugger_interface.py
@@ -134,7 +134,10 @@ class DebuggerInterface(ABC):
 
     @abstractmethod
     def load_core(
-        self, core_path: str, executable_path: Optional[str] = None
+        self,
+        core_path: str,
+        executable_path: Optional[str] = None,
+        auto_load_debuginfo: bool = False,
     ) -> DebuggerResult:
         """Load a core file for debugging."""
         pass
diff --git a/lldb/test/API/gpu/comparison/framework/gdb_driver.py b/lldb/test/API/gpu/comparison/framework/gdb_driver.py
index 7df9ee9de2d1e..a34f328405609 100644
--- a/lldb/test/API/gpu/comparison/framework/gdb_driver.py
+++ b/lldb/test/API/gpu/comparison/framework/gdb_driver.py
@@ -270,11 +270,17 @@ def _send_command(self, command: str, timeout: float = 60.0) -> str:
         return self._wait_for_prompt(timeout)
 
     def load_core(
-        self, core_path: str, executable_path: Optional[str] = None
+        self,
+        core_path: str,
+        executable_path: Optional[str] = None,
+        auto_load_debuginfo: bool = False,
     ) -> DebuggerResult:
         """Load a core file into the persistent GDB process."""
         self._start_gdb()
 
+        if auto_load_debuginfo:
+            core_path = os.path.realpath(core_path)
+
         self._core_path = core_path
         self._executable_path = executable_path
 
@@ -285,6 +291,9 @@ def load_core(
         # Load the core file
         self._send_command(f"core-file {core_path}")
         self._core_loaded = True
+        auto_load_output = ""
+        if auto_load_debuginfo:
+            auto_load_output = self._auto_load_debuginfo()
 
         # Get thread info to verify core loaded
         script = """
@@ -309,8 +318,29 @@ def load_core(
             success=data.get("success", False),
             error_message=data.get("error", ""),
             raw_output=data.get("raw_output", ""),
-            extra_data={"thread_count": data.get("thread_count", 0)},
+            extra_data={
+                "thread_count": data.get("thread_count", 0),
+                "auto_load_output": auto_load_output,
+            },
+        )
+
+    def _auto_load_debuginfo(self) -> str:
+        fbcode_path = os.environ.get("GPU_COMPARISON_FBCODE_PATH")
+        if not fbcode_path:
+            return "GPU_COMPARISON_FBCODE_PATH is not set; skipped auto-load-debuginfo\n"
+
+        fbload_path = os.path.join(fbcode_path, "gdb", "scripts", "fbload.py")
+        output = []
+        output.append(self._send_command(f"source {fbload_path}"))
+        output.append(self._send_command("fbload auto_debuginfo"))
+        output.append(
+            self._send_command(
+                "python import os; os.environ['LD_LIBRARY_PATH'] = "
+                "os.environ.get('LLDB_SYMBOL_STORAGE_LD_LIBRARY_PATH', '')"
+            )
         )
+        output.append(self._send_command("auto-load-debuginfo", timeout=600.0))
+        return "".join(output)
 
     def get_all_threads(self) -> DebuggerResult:
         """Get list of all threads (CPU + GPU in flat view).
diff --git a/lldb/test/API/gpu/comparison/framework/lldb_driver.py b/lldb/test/API/gpu/comparison/framework/lldb_driver.py
index db82a345de132..f69a8ed4ae25e 100644
--- a/lldb/test/API/gpu/comparison/framework/lldb_driver.py
+++ b/lldb/test/API/gpu/comparison/framework/lldb_driver.py
@@ -6,6 +6,8 @@
 direct access to LLDB's SB API.
 """
 
+import os
+
 import lldb
 from typing import List, Optional, Dict, Any
 
@@ -128,9 +130,15 @@ def get_thread_count(self) -> int:
         return self._process.GetNumThreads()
 
     def load_core(
-        self, core_path: str, executable_path: Optional[str] = None
+        self,
+        core_path: str,
+        executable_path: Optional[str] = None,
+        auto_load_debuginfo: bool = False,
     ) -> DebuggerResult:
         """Load a core file using the in-process LLDB API."""
+        if auto_load_debuginfo:
+            return self._load_core_auto_debuginfo(core_path)
+
         self._core_path = core_path
 
         try:
@@ -185,6 +193,61 @@ def load_core(
         except Exception as e:
             return DebuggerResult(success=False, error_message=str(e))
 
+    def _load_core_auto_debuginfo(self, core_path: str) -> DebuggerResult:
+        self._core_path = os.path.realpath(core_path)
+        output = []
+
+        fbcode_path = os.environ.get("GPU_COMPARISON_FBCODE_PATH")
+        if fbcode_path:
+            result = self.execute_command(
+                f"script import sys; sys.path.insert(0, {fbcode_path!r})"
+            )
+            output.append(result.raw_output or result.error_message)
+
+        result = self.execute_command(
+            "script import os; os.environ['LD_LIBRARY_PATH'] = "
+            "os.environ.get('LLDB_SYMBOL_STORAGE_LD_LIBRARY_PATH', '')"
+        )
+        output.append(result.raw_output or result.error_message)
+
+        result = self.execute_command("command script import fblldb")
+        output.append(result.raw_output or result.error_message)
+
+        result = self.execute_command(f"auto-load-debuginfo {self._core_path}")
+        output.append(result.raw_output or result.error_message)
+        if not result.success:
+            return DebuggerResult(
+                success=False,
+                error_message=result.error_message,
+                raw_output=result.raw_output,
+                extra_data={"auto_load_output": "\n".join(output)},
+            )
+
+        self._target = self._debugger.GetSelectedTarget()
+        if not self._target or not self._target.IsValid():
+            return DebuggerResult(
+                success=False,
+                error_message="auto-load-debuginfo did not create a valid target",
+                extra_data={"auto_load_output": "\n".join(output)},
+            )
+
+        self._process = self._target.GetProcess()
+        if not self._process or not self._process.IsValid():
+            return DebuggerResult(
+                success=False,
+                error_message="auto-load-debuginfo did not create a valid process",
+                extra_data={"auto_load_output": "\n".join(output)},
+            )
+
+        return DebuggerResult(
+            success=True,
+            extra_data={
+                "thread_count": self._process.GetNumThreads(),
+                "target_triple": self._target.GetTriple(),
+                "auto_load_output": "\n".join(output),
+            },
+        )
+
     def get_all_threads(self) -> DebuggerResult:
         """Get list of all threads from all targets (CPU + GPU).
 

From 04dfd54d350a5326a7bfdee0d6e6b10da4c6dc98 Mon Sep 17 00:00:00 2001
From: Chen Li <chenlii@fb.com>
Date: Wed, 17 Jun 2026 17:23:51 -0700
Subject: [PATCH 2/2] [LLDB][AMDGPU] Refine GPU core comparison checks

---
 .../amd/TestAmdGpuCoreFileComparison.py       | 165 +++++++++++-------
 .../gpu/comparison/framework/comparator.py    |  89 +++++-----
 .../gpu/comparison/framework/gdb_driver.py    |  77 ++++++++
 .../gpu/comparison/framework/lldb_driver.py   |  87 ++++++++-
 4 files changed, 306 insertions(+), 112 deletions(-)

diff --git a/lldb/test/API/gpu/comparison/amd/TestAmdGpuCoreFileComparison.py b/lldb/test/API/gpu/comparison/amd/TestAmdGpuCoreFileComparison.py
index 398a688181777..fd5a60f116984 100644
--- a/lldb/test/API/gpu/comparison/amd/TestAmdGpuCoreFileComparison.py
+++ b/lldb/test/API/gpu/comparison/amd/TestAmdGpuCoreFileComparison.py
@@ -161,6 +161,61 @@ def tearDown(self):
     # Comparison helpers
     # ------------------------------------------------------------------
 
+    def _select_lldb_gpu_thread_matching_rocgdb(self, gdb_driver, lldb_driver):
+        """Select LLDB's GPU wave that matches ROCgDB's selected AMDGPU wave."""
+        lldb_select_result = lldb_driver.select_gpu()
+        if not lldb_select_result.success:
+            self.skipTest(lldb_select_result.error_message)
+        if lldb_driver.get_thread_count() == 0:
+            self.skipTest("No GPU threads in LLDB")
+
+        gdb_selected = gdb_driver.get_selected_thread()
+        if not gdb_selected.success:
+            self.trace(
+                "Could not query ROCgDB selected thread; keeping LLDB default "
+                f"GPU selection: {gdb_selected.error_message}"
+            )
+            return None
+
+        selected = gdb_selected.extra_data
+        self.trace(
+            "ROCgDB selected thread: "
+            f"gdb_thread={selected.get('id')} "
+            f"arch={selected.get('architecture')} "
+            f"wave={selected.get('amdgpu_wave_id')} "
+            f"lane={selected.get('amdgpu_lane_id')} "
+            f"pc={hex(selected.get('pc') or 0)} "
+            f"function={selected.get('function')}"
+        )
+        if selected.get("selected_line"):
+            self.trace(f"ROCgDB selected thread line: {selected['selected_line']}")
+
+        wave_id = selected.get("amdgpu_wave_id")
+        if wave_id is None:
+            self.trace(
+                "ROCgDB selected thread did not expose an AMDGPU wave id; "
+                "keeping LLDB default GPU selection"
+            )
+            return None
+
+        lldb_thread = lldb_driver.select_thread(wave_id)
+        if not lldb_thread.success:
+            self.fail(
+                "LLDB failed to select the ROCGDB-selected AMDGPU wave "
+                f"{wave_id}: {lldb_thread.error_message}"
+            )
+
+        info = lldb_thread.extra_data
+        self.trace(
+            "Selected LLDB GPU thread to match ROCGDB: "
+            f"wave={wave_id} "
+            f"lldb_tid={info.get('selected_thread')} "
+            f"index={info.get('selected_index_id')} "
+            f"name={info.get('selected_name')} "
+            f"target={info.get('target_triple')}"
+        )
+        return wave_id
+
     def _compare_variable_sets(self, comparator, gdb_vars, lldb_vars):
         """Compare variable sets between GDB and LLDB.
 
@@ -244,12 +299,7 @@ def _run_register_comparison(self, core_path):
         gdb_driver, lldb_driver, comparator = self._load_core(core_path)
 
         gdb_result = gdb_driver.get_registers()
-
-        lldb_select_result = lldb_driver.select_gpu()
-        if not lldb_select_result.success:
-            self.skipTest(lldb_select_result.error_message)
-        if lldb_driver.get_thread_count() == 0:
-            self.skipTest("No GPU threads in LLDB")
+        self._select_lldb_gpu_thread_matching_rocgdb(gdb_driver, lldb_driver)
 
         lldb_result = lldb_driver.get_registers()
         self.assertTrue(
@@ -305,22 +355,17 @@ def _run_register_comparison(self, core_path):
     def _run_local_variables_comparison(self, core_path):
         """Compare GPU local variables between debuggers for a core file.
 
-        Both debuggers select the crashing thread by default when loading a core.
-        We rely on this default selection rather than searching for threads,
-        which would change GDB's selected thread state.
+        ROCgDB and ROCLLDB use different default faulting-wave selection
+        policies. Query ROCgDB's selected AMDGPU wave id, then select the
+        matching ROCLLDB GPU thread before reading LLDB locals.
         """
         gdb_driver, lldb_driver, comparator = self._load_core(core_path)
 
-        lldb_select_result = lldb_driver.select_gpu()
-        if not lldb_select_result.success:
-            self.skipTest(lldb_select_result.error_message)
-        if lldb_driver.get_thread_count() == 0:
-            self.skipTest("No GPU threads in LLDB")
-
         # Get local variables from GDB using the default selected thread.
         # IMPORTANT: Do NOT call get_all_threads() here as it changes GDB's
         # selected thread!
         gdb_vars = gdb_driver.get_local_variables()
+        self._select_lldb_gpu_thread_matching_rocgdb(gdb_driver, lldb_driver)
 
         # Get local variables from LLDB through the LLDB adapter only.
         lldb_vars = lldb_driver.get_local_variables()
@@ -417,26 +462,47 @@ def _run_module_comparison(self, core_path):
             f"GDB failed to list modules: {gdb_result.error_message}",
         )
 
-        lldb_result = self._get_lldb_combined_modules(lldb_driver)
+        lldb_result = lldb_driver.get_combined_modules()
         self.assertTrue(
             lldb_result.success,
             f"LLDB failed to list modules: {lldb_result.error_message}",
         )
 
         comparison = comparator.compare_modules(gdb_result, lldb_result)
+        gdb_normalized_modules = comparator.get_normalized_module_counts(gdb_result)
+        lldb_normalized_modules = comparator.get_normalized_module_counts(lldb_result)
 
         def fmt(mod):
             return f"{mod.name} uuid={mod.uuid or '?'}"
 
         self.trace("\n=== Module comparison ===")
-        self.trace(f"GDB modules: {len(gdb_result.modules)}")
-        self.trace(f"LLDB modules: {len(lldb_result.modules)}")
+        self.trace(f"GDB normalized modules: {len(gdb_normalized_modules)}")
+        self.trace(f"LLDB normalized modules: {len(lldb_normalized_modules)}")
+        for target_info in lldb_result.extra_data.get("targets", []):
+            target_name = target_info["name"]
+            if target_info.get("skipped"):
+                self.trace(
+                    f"\nLLDB {target_name} target modules: skipped "
+                    f"({target_info.get('error', '')})"
+                )
+                continue
+
+            target_modules = DebuggerResult(
+                success=True, modules=target_info.get("modules", [])
+            )
+            normalized_modules = comparator.get_normalized_module_counts(
+                target_modules
+            )
+            self.trace(
+                f"\nLLDB {target_name} target normalized modules: "
+                f"{len(normalized_modules)}"
+            )
         for mod in gdb_result.modules:
             self.trace(f"  GDB: {fmt(mod)}")
         for mod in lldb_result.modules:
             self.trace(f"  LLDB: {fmt(mod)}")
 
-        if not gdb_result.modules and not lldb_result.modules:
+        if not gdb_normalized_modules and not lldb_normalized_modules:
             self.skipTest(
                 "No modules reported by either debugger "
                 "(executable/debug info unavailable for this core?)"
@@ -446,16 +512,19 @@ def fmt(mod):
         gdb_only_modules = comparison.gdb_only.get("modules", [])
         lldb_only_modules = comparison.lldb_only.get("modules", [])
 
-        # These keys have already been normalized by the comparator, so one-sided
-        # entries here are actionable comparison differences.
+        # These keys have already been normalized by the comparator. GDB-only
+        # entries are failures; LLDB-only entries are expected when LLDB sees
+        # extra placeholders or file-backed GPU code objects.
         if gdb_only_modules:
             self.trace(
-                f"  GDB-only normalized module keys ({len(gdb_only_modules)}): "
+                f"  GDB-only normalized module keys missing from LLDB "
+                f"({len(gdb_only_modules)}): "
                 + ", ".join(str(m) for m in gdb_only_modules[:10])
             )
         if lldb_only_modules:
             self.trace(
-                f"  LLDB-only normalized module keys ({len(lldb_only_modules)}): "
+                f"  LLDB-extra normalized module keys allowed "
+                f"({len(lldb_only_modules)}): "
                 + ", ".join(str(m) for m in lldb_only_modules[:10])
             )
 
@@ -469,47 +538,16 @@ def fmt(mod):
         if failure_lines:
             self.fail("Module comparison failed:\n" + "\n".join(failure_lines))
 
-    def _get_lldb_combined_modules(self, lldb_driver):
-        modules = []
-        errors = []
-
-        for target_name, select in (
-            ("CPU", lldb_driver.select_cpu),
-            ("GPU", lldb_driver.select_gpu),
-        ):
-            select_result = select()
-            if not select_result.success:
-                self.trace(
-                    f"\nLLDB {target_name} target modules: skipped "
-                    f"({select_result.error_message})"
-                )
-                continue
-
-            result = lldb_driver.get_modules()
-            if not result.success:
-                errors.append(
-                    f"LLDB failed to list {target_name} target modules: "
-                    f"{result.error_message}"
-                )
-                continue
-
-            self.trace(f"\nLLDB {target_name} target modules: {len(result.modules)}")
-            modules.extend(result.modules)
-
-        if errors:
-            return DebuggerResult(success=False, error_message="\n".join(errors))
-        return DebuggerResult(success=True, modules=modules)
-
     def _run_backtrace_comparison(self, core_path):
         """Compare the faulting GPU wave's backtrace between debuggers.
 
-        Both debuggers select the faulting wave by default when loading a
-        core; rely on that selection. IMPORTANT: Do NOT call
-        get_all_threads() first as it changes GDB's selected thread!
+        ROCgDB and ROCLLDB use different default faulting-wave selection
+        policies. Query ROCgDB's selected AMDGPU wave id, then select the
+        matching ROCLLDB GPU thread before collecting the LLDB backtrace.
 
-        PCs must match unconditionally; function names are compared only
-        for frames both debuggers symbolized (production cores are often
-        unsymbolized unless debug info was loaded).
+        PCs and depth must match. Function names fail only when ROCgDB has a
+        real symbol and ROCLLDB reports it as unknown; extra ROCLLDB
+        symbolication and demangler spelling differences are diagnostics.
         """
         gdb_driver, lldb_driver, comparator = self._load_core(
             core_path, auto_load_debuginfo=True
@@ -520,12 +558,7 @@ def _run_backtrace_comparison(self, core_path):
             gdb_result.success,
             f"GDB failed to get backtrace: {gdb_result.error_message}",
         )
-
-        select_result = lldb_driver.select_gpu()
-        if not select_result.success:
-            self.skipTest(select_result.error_message)
-        if lldb_driver.get_thread_count() == 0:
-            self.skipTest("No GPU threads in LLDB")
+        self._select_lldb_gpu_thread_matching_rocgdb(gdb_driver, lldb_driver)
 
         lldb_result = lldb_driver.get_backtrace()
         self.assertTrue(
diff --git a/lldb/test/API/gpu/comparison/framework/comparator.py b/lldb/test/API/gpu/comparison/framework/comparator.py
index c8ad889a7bd4a..684f4bb6d2659 100644
--- a/lldb/test/API/gpu/comparison/framework/comparator.py
+++ b/lldb/test/API/gpu/comparison/framework/comparator.py
@@ -2,17 +2,19 @@
 Result comparator for comparing GDB and LLDB debugging outputs.
 """
 
-from collections import Counter
-from dataclasses import dataclass, field
 import os
 import re
-from typing import List, Dict, Any, Optional, Set, Tuple
+from collections import Counter
+from dataclasses import dataclass, field
+from typing import Any
+from urllib.parse import unquote
+
 from .debugger_interface import (
     DebuggerResult,
-    ThreadInfo,
     FrameInfo,
-    VariableValue,
     ModuleInfo,
+    ThreadInfo,
+    VariableValue,
 )
 
 
@@ -32,9 +34,9 @@ class ComparisonResult:
     """Result of comparing GDB and LLDB outputs."""
 
     is_equivalent: bool = True
-    differences: List[ComparisonDifference] = field(default_factory=list)
-    gdb_only: Dict[str, List[Any]] = field(default_factory=dict)
-    lldb_only: Dict[str, List[Any]] = field(default_factory=dict)
+    differences: list[ComparisonDifference] = field(default_factory=list)
+    gdb_only: dict[str, list[Any]] = field(default_factory=dict)
+    lldb_only: dict[str, list[Any]] = field(default_factory=dict)
     summary: str = ""
 
     def add_difference(
@@ -218,13 +220,17 @@ def compare_backtrace(
                 gdb_func = gdb_frame.function
                 lldb_func = lldb_frame.function
 
-            if gdb_func != lldb_func:
+            # demangler spelling could affect the comparison, not fail on it if function names both exits
+            if gdb_func != lldb_func and (
+                not self._is_unknown_function(gdb_func)
+                and self._is_unknown_function(lldb_func)
+            ):
                 result.add_difference(
                     f"frame[{i}]",
                     "function",
                     gdb_func,
                     lldb_func,
-                    f"Frame {i} function differs: GDB='{gdb_func}', LLDB='{lldb_func}'",
+                    f"Frame {i} function missing in LLDB: GDB='{gdb_func}', LLDB='{lldb_func}'",
                 )
 
         result.summary = result.get_summary()
@@ -234,7 +240,7 @@ def compare_registers(
         self,
         gdb_result: DebuggerResult,
         lldb_result: DebuggerResult,
-        register_names: Optional[List[str]] = None,
+        register_names: list[str] | None = None,
     ) -> ComparisonResult:
         """Compare register values from GDB and LLDB."""
         result = ComparisonResult()
@@ -321,19 +327,18 @@ def compare_variables(
     def compare_modules(
         self, gdb_result: DebuggerResult, lldb_result: DebuggerResult
     ) -> ComparisonResult:
-        """Compare loaded modules from GDB and LLDB."""
+        """Compare loaded modules from GDB and LLDB.
+
+        For production GPU cores, LLDB often reports extra placeholder or
+        file-backed modules that ROCgDB omits. Treat LLDB as successful when it
+        contains every normalized ROCGDB module key. LLDB-only keys and
+        duplicate count differences are useful diagnostics, but not parity
+        failures.
+        """
         result = ComparisonResult()
 
-        gdb_modules = Counter(
-            key
-            for m in gdb_result.modules
-            if (key := self._normalize_module_key(m)) is not None
-        )
-        lldb_modules = Counter(
-            key
-            for m in lldb_result.modules
-            if (key := self._normalize_module_key(m)) is not None
-        )
+        gdb_modules = self.get_normalized_module_counts(gdb_result)
+        lldb_modules = self.get_normalized_module_counts(lldb_result)
 
         all_names = set(gdb_modules.keys()) | set(lldb_modules.keys())
 
@@ -343,13 +348,6 @@ def compare_modules(
 
             if gdb_count == 0:
                 result.add_lldb_only("modules", name)
-                result.add_difference(
-                    "modules",
-                    name,
-                    0,
-                    lldb_count,
-                    f"Module '{name}' only in LLDB",
-                )
                 continue
 
             if lldb_count == 0:
@@ -363,28 +361,24 @@ def compare_modules(
                 )
                 continue
 
-            if gdb_count != lldb_count:
-                result.add_difference(
-                    "modules",
-                    name,
-                    gdb_count,
-                    lldb_count,
-                    f"Module '{name}' count differs: GDB={gdb_count}, LLDB={lldb_count}",
-                )
-
         result.summary = result.get_summary()
         return result
 
+    def get_normalized_module_counts(
+        self, debugger_result: DebuggerResult
+    ) -> Counter[str]:
+        """Return normalized module keys and their observed counts."""
+        return Counter(
+            key
+            for module in debugger_result.modules
+            if (key := self._normalize_module_key(module)) is not None
+        )
+
     def _normalize_module_key(self, module: ModuleInfo) -> str | None:
         """Normalize debugger-specific module names into comparable keys."""
         raw_path = module.path or ""
         name = module.name or os.path.basename(raw_path) or "<unknown>"
 
-        # GDB exposes .gnu_debugdata as a synthetic objfile. It is debug info
-        # for the following module, not a separately loaded module.
-        if raw_path.startswith(".gnu_debugdata for "):
-            return None
-
         if name == "[vdso]" or raw_path.startswith("system-supplied DSO"):
             return "vdso"
 
@@ -416,7 +410,7 @@ def _normalize_module_key(self, module: ModuleInfo) -> str | None:
 
         # File-backed GPU code objects may have #offset/#size suffixes on one
         # side. Keep the backing file basename as the module identity.
-        path = (raw_path or name).removeprefix("file://")
+        path = unquote(raw_path or name).removeprefix("file://")
         path = path.split("#offset=", 1)[0]
         return os.path.basename(path) or name
 
@@ -434,6 +428,13 @@ def _normalize_function_name(self, name: str) -> str:
 
         return name
 
+    def _is_unknown_function(self, name: str) -> bool:
+        """Return whether a function name represents missing symbolication."""
+        if not name:
+            return True
+        normalized = name.strip()
+        return normalized in ("<unknown>", "??", "?? ()")
+
     def normalize_pointer_value(self, value):
         """Normalize pointer value format for comparison.
 
diff --git a/lldb/test/API/gpu/comparison/framework/gdb_driver.py b/lldb/test/API/gpu/comparison/framework/gdb_driver.py
index a34f328405609..dd00b8b8720d3 100644
--- a/lldb/test/API/gpu/comparison/framework/gdb_driver.py
+++ b/lldb/test/API/gpu/comparison/framework/gdb_driver.py
@@ -431,6 +431,83 @@ def select_thread(self, thread_id: int) -> DebuggerResult:
             success=data.get("success", False), error_message=data.get("error", "")
         )
 
+    def get_selected_thread(self) -> DebuggerResult:
+        """Return ROCgDB's current selected thread without changing selection.
+
+        ROCgDB's flat GPU thread number does not match LLDB's GPU thread id.
+        For AMDGPU waves, the useful cross-debugger key is the wave id embedded
+        in names like: AMDGPU Lane 3:5:1:8192/0 (...).
+        """
+        script = r"""
+import gdb
+import json
+import re
+
+result = {"success": True, "error": ""}
+
+try:
+    thread = gdb.selected_thread()
+    frame = gdb.selected_frame()
+    current_thread_output = ""
+    try:
+        current_thread_output = gdb.execute("thread", to_string=True)
+    except:
+        pass
+
+    thread_name = thread.name or ""
+    selected_line = current_thread_output.strip()
+    thread_text = " ".join([thread_name, selected_line])
+    wave_match = re.search(
+        r"AMDGPU\s+Lane\s+(?:\d+:){3}(\d+)/(\d+)",
+        thread_text,
+    )
+    if not wave_match:
+        info_threads = gdb.execute("info threads", to_string=True)
+        for line in info_threads.splitlines():
+            if line.lstrip().startswith("*"):
+                selected_line = line.strip()
+                break
+        wave_match = re.search(
+            r"AMDGPU\s+Lane\s+(?:\d+:){3}(\d+)/(\d+)",
+            selected_line,
+        )
+
+    result.update({
+        "id": thread.global_num,
+        "name": thread_name,
+        "selected_line": selected_line,
+        "pc": frame.pc(),
+        "function": frame.name() or "<unknown>",
+        "architecture": frame.architecture().name(),
+    })
+
+    if wave_match:
+        result["amdgpu_wave_id"] = int(wave_match.group(1))
+        result["amdgpu_lane_id"] = int(wave_match.group(2))
+
+except Exception as e:
+    result["success"] = False
+    result["error"] = str(e)
+
+print("RESULT_JSON:" + json.dumps(result))
+"""
+        data = self._run_python_script(script)
+
+        return DebuggerResult(
+            success=data.get("success", False),
+            error_message=data.get("error", ""),
+            extra_data={
+                "id": data.get("id"),
+                "name": data.get("name"),
+                "selected_line": data.get("selected_line"),
+                "pc": data.get("pc"),
+                "function": data.get("function"),
+                "architecture": data.get("architecture"),
+                "amdgpu_wave_id": data.get("amdgpu_wave_id"),
+                "amdgpu_lane_id": data.get("amdgpu_lane_id"),
+            },
+        )
+
     def get_backtrace(self, thread_id: Optional[int] = None) -> DebuggerResult:
         """Get backtrace for current or specified thread."""
         thread_select = f'gdb.execute("thread {thread_id}")' if thread_id else ""
diff --git a/lldb/test/API/gpu/comparison/framework/lldb_driver.py b/lldb/test/API/gpu/comparison/framework/lldb_driver.py
index f69a8ed4ae25e..95874698b2cd4 100644
--- a/lldb/test/API/gpu/comparison/framework/lldb_driver.py
+++ b/lldb/test/API/gpu/comparison/framework/lldb_driver.py
@@ -325,7 +325,27 @@ def select_thread(self, thread_id: int) -> DebuggerResult:
             if not self._process or not self._process.IsValid():
                 return DebuggerResult(success=False, error_message="No valid process")
 
-            # Search in all targets
+            # Prefer the currently selected target/process. The comparison test
+            # selects the GPU target first, and ROCgDB's AMDGPU wave id maps to
+            # LLDB's GPU thread id. Searching CPU targets first can accidentally
+            # match an unrelated host thread with the same numeric id.
+            for i in range(self._process.GetNumThreads()):
+                thread = self._process.GetThreadAtIndex(i)
+                if thread.GetThreadID() == thread_id:
+                    self._process.SetSelectedThread(thread)
+                    return DebuggerResult(
+                        success=True,
+                        extra_data={
+                            "selected_thread": thread.GetThreadID(),
+                            "selected_index_id": thread.GetIndexID(),
+                            "selected_name": thread.GetName(),
+                            "target_triple": self._target.GetTriple()
+                            if self._target and self._target.IsValid()
+                            else None,
+                        },
+                    )
+
+            # Fallback for callers that have not selected the target first.
             for target_idx in range(self._debugger.GetNumTargets()):
                 target = self._debugger.GetTargetAtIndex(target_idx)
                 process = target.GetProcess()
@@ -343,7 +363,12 @@ def select_thread(self, thread_id: int) -> DebuggerResult:
                             self._process = process
                             return DebuggerResult(
                                 success=True,
-                                extra_data={"selected_thread": thread.GetThreadID()},
+                                extra_data={
+                                    "selected_thread": thread.GetThreadID(),
+                                    "selected_index_id": thread.GetIndexID(),
+                                    "selected_name": thread.GetName(),
+                                    "target_triple": target.GetTriple(),
+                                },
                             )
 
             return DebuggerResult(
@@ -582,6 +607,64 @@ def get_modules(self) -> DebuggerResult:
         except Exception as e:
             return DebuggerResult(success=False, error_message=str(e))
 
+    def get_combined_modules(self) -> DebuggerResult:
+        """Get modules from all LLDB targets as one flat module list."""
+        modules = []
+        errors = []
+        targets = []
+
+        for target_name, select in (
+            ("CPU", self.select_cpu),
+            ("GPU", self.select_gpu),
+        ):
+            select_result = select()
+            if not select_result.success:
+                targets.append(
+                    {
+                        "name": target_name,
+                        "skipped": True,
+                        "error": select_result.error_message,
+                    }
+                )
+                continue
+
+            result = self.get_modules()
+            if not result.success:
+                errors.append(
+                    f"LLDB failed to list {target_name} target modules: "
+                    f"{result.error_message}"
+                )
+                targets.append(
+                    {
+                        "name": target_name,
+                        "error": result.error_message,
+                        "modules": [],
+                    }
+                )
+                continue
+
+            modules.extend(result.modules)
+            targets.append(
+                {
+                    "name": target_name,
+                    "modules": result.modules,
+                }
+            )
+
+        if errors:
+            return DebuggerResult(
+                success=False,
+                error_message="\n".join(errors),
+                modules=modules,
+                extra_data={"targets": targets},
+            )
+
+        return DebuggerResult(
+            success=True,
+            modules=modules,
+            extra_data={"targets": targets},
+        )
+
     def select_frame(self, frame_index: int) -> DebuggerResult:
         """Select a frame by index."""
         try: